In [None]:
!pip install segmentation_models

In [None]:
os.environ['CUDA_VISIBLE_DEVICES'] = '0, 1, 2, 3'
os.environ["SM_FRAMEWORK"] = "tf.keras"
os.environ['TF_DISABLE_LAYOUT_OPTIMIZATION'] = '1'

In [None]:
"""
importing the necessary libraries
"""


import os
import sys
import json

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import tensorflow as tf
import segmentation_models as sm
import albumentations as A
import numpy as np
import cv2

import tensorflow.keras.backend as K
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import LearningRateScheduler
from scipy.ndimage import binary_fill_holes
from tensorflow.keras import layers
from PIL import Image



In [None]:
physical_devices = tf.config.list_physical_devices('GPU')
if len(physical_devices) > 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
    tf.config.set_visible_devices(physical_devices[0], 'GPU')

In [None]:
def lr_schedule(epoch):
    # You can define any learning rate schedule you want based on the epoch number
    if epoch < 20:
        lr = 0.0001
    elif epoch > 20 and epoch < 60:
        lr = 0.00001
    else:
        kr = 0.000001
    return lr

lr_scheduler = LearningRateScheduler(lr_schedule)

In [None]:
class CustomLoss(tf.keras.losses.Loss):
    def __init__(self, name='custom_loss', **kwargs):
        super(CustomLoss, self).__init__(name=name, **kwargs)
        self.diceloss = sm.losses.DiceLoss()
        self.binloss = sm.losses.BinaryCELoss()

    def call(self, y_true, y_pred):
        dice = self.diceloss(y_true, y_pred)
        bce = self.binloss(y_true, y_pred)
        loss = dice * 0.7 + bce * 0.3
        return loss


In [None]:
custom_loss = CustomLoss()
dice_metric = sm.metrics.FScore(smooth=1e-05)

In [None]:
BACKBONE = 'efficientnetb1'
preprocess_input = sm.get_preprocessing(BACKBONE)
model = sm.Unet(BACKBONE, classes=1, activation='sigmoid')

optimizer = tf.keras.optimizers.Adam(learning_rate=0.00001)
model.compile(
    optimizer = optimizer,
    loss=custom_loss,
    metrics=[sm.metrics.iou_score , dice_metric],
)


In [None]:
"""
THe below pieces of code is directly from one of the submitters who was kind enough to share
"""

class Acquisition:
    
    def get_datframe(self,path):
        return pd.read_csv(path)
    
    def get_json_dataframe(self, json_file):
        data = []
        with open(json_file, 'r') as file:
            for line in file:
                item = json.loads(line)
                data.append(item)
        
        json_df = pd.DataFrame(data)
        return json_df
    
        
acq = Acquisition()


In [None]:
title=acq.get_datframe('/kaggle/input/hubmap-hacking-the-human-vasculature/tile_meta.csv')
title.head()

In [None]:
wsi = acq.get_datframe(path='/kaggle/input/hubmap-hacking-the-human-vasculature/wsi_meta.csv')
wsi.head()

In [None]:
polygons_df = acq.get_json_dataframe('/kaggle/input/hubmap-hacking-the-human-vasculature/polygons.jsonl')
polygons_df.head()

In [None]:
"""
We are separating the annotations from the json file to the very atomic state.
Each annotation is a new row in the created dataframe
"""

def separate_annotations(dataset):
    separated  = pd.DataFrame(columns=["id","type","coordinates"])
    for index in dataset.index:
        id = dataset["id"][index]
        all_annotations = dataset["annotations"][index]
        for each_annotation in all_annotations:
            annotation_type = each_annotation["type"]
            annotation_coordinates = each_annotation["coordinates"]
            separated.loc[len(separated)]=[id,annotation_type,annotation_coordinates]
    return separated

separated_polygons_df  = separate_annotations(polygons_df)
separated_polygons_df.head()

In [None]:

def get_augmentation(p=1.0):
    return A.Compose([
        A.HorizontalFlip(),
        A.VerticalFlip(),
        A.RandomRotate90(),
        A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=15, p=0.60,
                         border_mode=cv2.BORDER_REFLECT),
        A.OneOf([
            A.ElasticTransform(p=.3),
            A.GaussianBlur(p=.3),
            A.GaussNoise(p=.3),
            A.OpticalDistortion(p=0.3),
            A.GridDistortion(p=.1),
        ], p=0.3),
        A.OneOf([
            A.HueSaturationValue(15,25,0),
            A.CLAHE(clip_limit=2),
            A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3),
        ], p=0.3),
    ], p=p)

In [None]:
def preprocess_image(file_name, image_size=(512, 512), augmentation=None):
    if 'tif' not in file_name:
        path = '/kaggle/input/hubmap-hacking-the-human-vasculature/train/{}.tif'.format(file_name)
    else:
        path = '/kaggle/input/hubmap-hacking-the-human-vasculature/train/{}'.format(file_name)
    image = cv2.imread(path)
    #image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = np.asarray(image, dtype=np.uint8)

    mask = np.zeros(image_size, dtype=np.float32)
    filter_criteria = separated_polygons_df["id"] == file_name
    all_coordinates = separated_polygons_df.loc[filter_criteria, "coordinates"].tolist()
    all_type = separated_polygons_df.loc[filter_criteria, "type"].tolist()
    for i in range(len(all_coordinates)):
        if all_type[i] == "blood_vessel":
            x_values = [point[0] for point in all_coordinates[i][0]]
            y_values = [point[1] for point in all_coordinates[i][0]]
            mask[x_values, y_values] = 1
    mask = binary_fill_holes(mask)
    mask = mask.astype(np.float32)

    # Apply data augmentation if provided
    if augmentation is not None:
        augmented = augmentation(image=image, mask=mask)
        image, mask = augmented["image"], augmented["mask"]

    return image, np.expand_dims(mask, axis=-1)


sample_image = separated_polygons_df["id"][1]
image, mask = preprocess_image(sample_image, augmentation=get_augmentation(p=1.0))
image = image/255.0
mask = mask.astype(np.uint8)
print(image.shape)
print(mask.shape)
# Display the image and mask
fig, axes = plt.subplots(1, 2, figsize=(10, 5))

if image is not None or mask is not None:
    axes[0].imshow(image)
    axes[0].set_title("Image")
    axes[0].axis("off")
    axes[1].imshow(mask)
    axes[1].set_title("Mask")
    axes[1].axis("off")
    plt.show()

print("Done")

In [None]:
class CustomDataGen(tf.keras.utils.Sequence):
    def __init__(self, df,batch_size, input_size=(512, 512, 3),shuffle=True ,test_size=0.1, val_size=0.1):
        self.all_unique_images = df["id"].unique()
        self.batch_size = batch_size
        self.input_size = input_size
        self.shuffle = shuffle
        
        
        train_val_indices, self.test_indices = train_test_split(
            np.arange(len(self.all_unique_images)),
            test_size=test_size,
            shuffle=shuffle
        )
        
        # Split the remaining train_val_indices into train and validation sets
        self.train_indices, self.val_indices = train_test_split(
            train_val_indices,
            test_size=val_size,
            shuffle=shuffle
        )
    def __len__(self):
        return len(self.all_unique_images)//self.batch_size
    
    def __getitem__(self, index):
        batch_indices = self.all_unique_images[index * self.batch_size:(index + 1) * self.batch_size]
        images = []
        masks = []
        for each_image in batch_indices:
            image, mask = preprocess_image(each_image,augmentation=get_augmentation(p=1.0))
            images.append(image)
            masks.append(mask)

        images = np.asarray(images, dtype=np.float32) / 255.0
        masks = np.asarray(masks, dtype=np.float32)
        return images, masks
    
    def get_validation_data(self):
        images = []
        masks = []
        for each_image in self.val_indices:
            image, mask = preprocess_image(self.all_unique_images[each_image])
            images.append(image)
            masks.append(mask)

        images = np.asarray(images, dtype=np.float32) / 255.0
        masks = np.asarray(masks, dtype=np.float32)

        return images, masks

    def get_test_data(self):
        images = []
        masks = []
        for each_image in self.test_indices:
            image, mask = preprocess_image(self.all_unique_images[each_image])
            images.append(image)
            masks.append(mask)

        images = np.asarray(images, dtype=np.float32) / 255.0
        masks = np.asarray(masks, dtype=np.float32)

        return images, masks
    

In [None]:
data_generator = CustomDataGen(separated_polygons_df, batch_size=8, test_size=0.1, val_size=0.15)
val_images, val_masks = data_generator.get_validation_data()  # Accessing the validation data
test_images, test_masks = data_generator.get_test_data() 

In [None]:
# total_batches = len(data_generator)  # Get the total number of batches

# for batch_index in range(total_batches):
#     X_batch, y_batch = data_generator[batch_index]  # Retrieve a batch of data

#     print(f"Batch {batch_index+1}/{total_batches}")
#     print(f"X_batch shape: {X_batch.shape}")
#     print(f"y_batch shape: {y_batch.shape}")

In [None]:
checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True, monitor='val_loss', mode='min', verbose=1)


    # Perform the training step with the current batch
history = model.fit(data_generator,
                    epochs=100,
                    validation_data=(val_images, val_masks),
                    callbacks=[checkpoint, lr_scheduler])

# Train the model

# Evaluate the model


In [None]:
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss per epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()


In [None]:
test_dir = sys.path[0] + '/kaggle/input/hubmap-hacking-the-human-vasculature/test/'

submission = pd.DataFrame()
ids = []
h = []
w = []
pred_strings = []


In [None]:
images_test=[]
masks_test=[]
predictions = []
for each_image in os.listdir(test_dir):
    image = cv2.imread(test_dir + each_image)
    image = np.asarray(image, dtype=np.uint8)
    height, width, channels = curr_img.shape
    
    predicted_mask = model.predict(np.expand_dims(image_gray, axis=0))
    
    h.append(height)
    w.append(width)
    
    ## Get prediction_string
    pred_strings.append(get_pred_string(objects))

    
submission["id"] = ids
submission["height"] = h
submission["width"] = w
submission["prediction_string"] = pred_strings
submission.set_index("id", inplace=True)
submission.to_csv("submission.csv")

