In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
franciscoescobar_satellite_images_of_water_bodies_path = kagglehub.dataset_download('franciscoescobar/satellite-images-of-water-bodies')

print('Data source import complete.')

In [None]:
!python -V

In [None]:
!pip install --upgrade pip

In [None]:
!pip install --upgrade opencv-contrib-python

In [None]:
!pip install segmentation-models-pytorch

In [None]:
!pip install albumentations

In [None]:
import os
import glob

import albumentations as A
from albumentations import HorizontalFlip, VerticalFlip, Rotate
import cv2
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import segmentation_models_pytorch as smp
from segmentation_models_pytorch.losses import DiceLoss
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

torch.cuda.is_available()

## Load the data

In [None]:
import glob
image_paths = sorted(glob.glob('../input/satellite-images-of-water-bodies/Water Bodies Dataset/Images/*'))
mask_paths = sorted(glob.glob('../input/satellite-images-of-water-bodies/Water Bodies Dataset/Masks/*'))

In [None]:
image_paths[0]

In [None]:
len(image_paths)

In [None]:
len(mask_paths)

In [None]:
img = Image.open(image_paths[0])
mask = Image.open(mask_paths[0])
# mask = Image.open(mask_paths[0]).convert('L')

plt.subplot(1, 2, 1)
plt.imshow(img)

plt.subplot(1, 2, 2)
plt.imshow(mask)

## Image Processing Pipeline

In [None]:
height,width = (512, 512)

In [None]:
img = Image.open(image_paths[0])
mask = Image.open(mask_paths[0]).convert('L')

In [None]:
img_np, mask_np = np.array(img), np.array(mask)

img_np.shape

In [None]:
mask_np.shape

In [None]:
# 1. Define the pipeline
pipeline = A.Compose([
    A.Resize(height,width),
    A.HorizontalFlip(),
    # A.HorizontalFlip(p=0.5), # 50% chance to flip
    # A.RandomBrightnessContrast(p=0.8), # 80% chance to adjust brightness/contrast
    # A.GaussianBlur(p=0.3), # 30% chance to blur
])

# 2. Apply the pipeline
# transformed_data = pipeline(image=image)
# transformed_image = transformed_data['image']

img_np_pr = pipeline(image=np.array(Image.open(image_paths[0])))['image']
mask_np_pr = pipeline(image=np.array(Image.open(mask_paths[0]).convert('L')))['image']

In [None]:
mask_np_pr.shape

In [None]:
img_np_pr.shape

In [None]:
img_np_pr = np.transpose(img_np_pr, (2, 0, 1))
img_np_pr = img_np_pr/255.0
img_np_pr = torch.tensor(img_np_pr)
img_np_pr.shape

In [None]:
mask_np_pr = np.expand_dims(mask_np_pr, axis=0)
mask_np_pr = mask_np_pr/255.0
mask_np_pr = torch.tensor(mask_np_pr)
mask_np_pr.shape

In [None]:
class PreprocessImage():
    def __init__(
        self,
        height=512, 
        width=512
    ):
        
        self.pipeline = A.Compose([
            A.Resize(height,width),
            A.HorizontalFlip(),
            #A.RandomBrightnessContrast(p=0.5),
            #A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
        ])
        # print('Albumation Pipeline created')

    def run(
        self,
        image_raw
    ):
        # Apply the pipeline
        img_pr = self.pipeline(image=np.array(image_raw))['image']

        if len(img_pr.shape) == 3: # images with 3 channels (RGB)
            print('Transposing image with 3 dimensions:')
            img_pr = np.transpose(img_pr, (2, 0, 1)) 
        elif len(img_pr.shape) == 2:  # images with a single channel
            print('Expanding image with 2 dimensions')
            img_pr= np.expand_dims(img_pr, axis=0)
        else:
            raise ValueError(f'Image needs to be 2 or 3 dimensional, the requested image has the following dimensions: {img_pr.shape}')
    
        img_pr = img_pr/255.0
        img_pr = torch.tensor(img_pr)
        return img_pr
        

class Load_Data(Dataset):
    def __init__(self, image_list, mask_list):
        super().__init__()

        self.images_list = image_list
        self.mask_list = mask_list
        self.len = len(image_list)

        self.preprocess_image = PreprocessImage()
        
        
    def __getitem__(self, idx):
        img_raw = Image.open(self.images_list[idx])
        mask_raw = Image.open(self.mask_list[idx]).convert('L')

        img = self.preprocess_image.run(img_raw)
        mask = self.preprocess_image.run(mask_raw)

        return img, mask
    
    def __len__(self):
        return self.len

In [None]:
num = 0
img_raw = Image.open(image_paths[num])
img_raw

In [None]:
preprocess_image = PreprocessImage()

img_pr = preprocess_image.run(img_raw)
img_pr.shape

## Model Training

In [None]:
from sklearn.model_selection import train_test_split

X = image_paths
y = mask_paths
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42)

In [None]:
train_dataset = Load_Data_Original(X_train, y_train)
valid_dataset = Load_Data_Original(X_val, y_val)

In [None]:
train_dataset = Load_Data(X_train, y_train)
valid_dataset = Load_Data(X_val, y_val)

In [None]:
img, mask = train_dataset[18]

In [None]:
img, mask = train_dataset[18]

f, axarr = plt.subplots(1,2) 
axarr[1].imshow(np.squeeze(mask.numpy()), cmap='gray')
axarr[0].imshow(np.transpose(img.numpy(), (1,2,0)))


**This settings are customizable you could change it as you want. Encoder selection is important, your results will be according to it...**

In [None]:
DEVICE='cuda'

EPOCHS=5
BATCH_SIZE=16
LR=0.001

ratio=0.5 #Various ratios could perform better for visualization
sample_num=2

ENCODER='resnet50'
WEIGHTS='imagenet'

**We load the data with DataLoader...**

In [None]:
train_loader = DataLoader(
        dataset=train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=2,
)

valid_loader = DataLoader(
        dataset=valid_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=2,
)


**Here we determine the encoder and weights from the above variables in Unet.**

**The value of our loss function is the sum of the DiceLoss and BCEWithLogitsLoss.**

In [None]:
class SegmentationModel(nn.Module):
    def __init__(self):
        super(SegmentationModel,self).__init__()

        self.arc=smp.Unet(
            encoder_name=ENCODER,
            encoder_weights=WEIGHTS,
            in_channels=3,
            classes=1,
            activation=None
        )
    def forward(self,images,masks=None):
        logits=self.arc(images)

        if masks!=None:
            loss1=DiceLoss(mode='binary')(logits,masks)
            loss2=nn.BCEWithLogitsLoss()(logits,masks)
            return logits,loss1,loss2
        return logits

In [None]:
model = SegmentationModel()
model.to(DEVICE);

**We write the function to be used for training and eval so that things are simplified in the training cycle.**

In [None]:
def train_fn(data_loader,model,optimizer):
    model.train()
    total_diceloss=0.0
    total_bceloss=0.0
    for images ,masks in tqdm(data_loader):
        images=images.to(DEVICE, dtype=torch.float32)
        masks=masks.to(DEVICE, dtype=torch.float32)

        optimizer.zero_grad()

        logits,diceloss,bceloss=model(images,masks)
        diceloss.backward(retain_graph=True)
        bceloss.backward()
        optimizer.step()
        total_diceloss+=diceloss.item()
        total_bceloss+=bceloss.item()
        

    return total_diceloss/len(data_loader),total_bceloss/len(data_loader)

In [None]:
def eval_fn(data_loader,model):
    model.eval()
    total_diceloss=0.0
    total_bceloss=0.0
    with torch.no_grad():
        for images ,masks in tqdm(data_loader):
            images=images.to(DEVICE, dtype=torch.float32)
            masks=masks.to(DEVICE, dtype=torch.float32)

            logits,diceloss,bceloss=model(images,masks)
            total_diceloss+=diceloss.item()
            total_bceloss+=bceloss.item()
            
        #Visualization
        for i in range(1):
            image,mask=next(iter(valid_loader))
            image=image[sample_num]
            mask=mask[sample_num]
            logits_mask=model(image.to('cuda', dtype=torch.float32).unsqueeze(0))
            pred_mask=torch.sigmoid(logits_mask)
            pred_mask=(pred_mask > ratio)*1.0
            f, axarr = plt.subplots(1,3) 
            axarr[1].imshow(np.squeeze(mask.numpy()), cmap='gray')
            axarr[0].imshow(np.transpose(image.numpy(), (1,2,0)))
            axarr[2].imshow(np.transpose(pred_mask.detach().cpu().squeeze(0), (1,2,0)))
            plt.show()
            
    return total_diceloss/len(data_loader),total_bceloss/len(data_loader)


In [None]:
optimizer=torch.optim.SGD(model.parameters(), lr=LR, momentum=0.9)
#torch.optim.Adam(model.parameters(),lr=LR)

**We use the adam optimizer and set up our training loop.**

**Here we want to save the best model and see our loss at every step...**

In [None]:
best_val_dice_loss=np.Inf
best_val_bce_loss=np.Inf

try:
    os.mkdir("/kaggle/working/modelPerformance")
except:
    print("File Exists!")

try:
    os.mkdir("/kaggle/working/modelPerformance/" + ENCODER)
except:
    print("File Exists!")

for i in range(EPOCHS):
    train_loss = train_fn(train_loader,model,optimizer)
    valid_loss = eval_fn(valid_loader,model)
    
    train_dice,train_bce=train_loss
    valid_dice,valid_bce=valid_loss
    print(f'Epochs:{i+1}\nTrain_loss --> Dice: {train_dice} BCE: {train_bce} \nValid_loss --> Dice: {valid_dice} BCE: {valid_bce}')
    if valid_dice < best_val_dice_loss: #or valid_bce < best_val_bce_loss
        torch.save(model.state_dict(),"/kaggle/working/modelPerformance/{}/model_{:.5f}dice.pt".format(ENCODER, valid_dice))
        print('Model Saved')
        best_val_dice_loss=valid_dice
        best_val_bce_loss=valid_bce

In [None]:
num=10
ratio=0.5
PATH = f"/kaggle/working/modelPerformance/{ENCODER}/"
model.load_state_dict(torch.load(PATH + os.listdir(PATH)[-1]))

image,mask=next(iter(valid_loader))
image=image[num]
mask=mask[num]
logits_mask=model(image.to('cuda', dtype=torch.float32).unsqueeze(0))
pred_mask=torch.sigmoid(logits_mask)
pred_mask=(pred_mask > ratio)*1.0

f, axarr = plt.subplots(1,3) 
axarr[0].imshow(np.transpose(image.numpy(), (1,2,0)))
axarr[0].set_title('Image')
axarr[1].imshow(np.squeeze(mask.numpy()), cmap='gray')
axarr[1].set_title('Mask')
axarr[2].imshow(np.transpose(pred_mask.detach().cpu().squeeze(0), (1,2,0)))
axarr[2].set_title('Predicted Mask')

## Inference

In [None]:
!ls /kaggle/working/modelPerformance/resnet50

In [None]:
def predict_mask(
    image,
    mask,
    model):
    logits_mask=model(image.to('cuda', dtype=torch.float32).unsqueeze(0))
    pred_mask=torch.sigmoid(logits_mask)
    pred_mask=(pred_mask > ratio)*1.0

    if mask is None:
        f, axarr = plt.subplots(1,2)
    else:
        f, axarr = plt.subplots(1,3)
    axidx = 0
    axarr[axidx].imshow(np.transpose(image.numpy(), (1,2,0)))
    axarr[axidx].set_title('Image')
    if mask is not None:
        axidx += 1
        axarr[axidx].imshow(np.squeeze(mask.numpy()), cmap='gray')
        axarr[axidx].set_title('Mask')
    axidx += 1
    axarr[axidx].imshow(np.transpose(pred_mask.detach().cpu().squeeze(0), (1,2,0)))
    axarr[axidx].set_title('Predicted Mask')
    

In [None]:
valid_dice = 0.21742
PATH = "/kaggle/working/modelPerformance/{}/model_{:.5f}dice.pt".format(ENCODER, valid_dice)
seg_model = SegmentationModel()
seg_model.to(DEVICE);
seg_model.load_state_dict(torch.load(PATH))

preprocess_image = PreprocessImage()


In [None]:
image,mask=next(iter(valid_loader))

In [None]:
num=10

In [None]:
image[num].shape

In [None]:
mask[num].shape

In [None]:
predict_mask(
    image=image[num],
    mask=mask[num],
    model=seg_model
)

In [None]:
num=14
predict_mask(
    image=image[num],
    mask=None,
    model=seg_model
)

In [None]:
num=10

img_raw = Image.open(image_paths[num])
img_pr = preprocess_image.run(img_raw)

img_pr.shape

In [None]:
mask_raw = Image.open(mask_paths[num]).convert('L')
mask_pr = preprocess_image.run(mask_raw)

mask_pr.shape

In [None]:
predict_mask(
    image=img_pr,
    mask=mask_pr,
    model=seg_model
)

In [None]:
predict_mask(
    image=img_pr,
    mask=None,
    model=seg_model
)