In [23]:
import torch
import torch.nn as nn
import cv2
import torch.nn.functional as F
import torchvision.transforms.functional as TF
from torch.cuda.amp import GradScaler

In [24]:
# def crop_image(tensor, target):
#     tensor_size = tensor.size()[2]
#     target_size = target.size()[2]
#     delta = tensor_size - target_size
#     delta = delta // 2
#     return tensor[:, :, delta:tensor_size-delta, delta:tensor_size-delta]

def crop_image(tensor, target):
    if tensor.shape != target.shape:
        tensor = TF.resize(tensor, size=target.shape[2:])
    return tensor

def double_conv(in_ch, out_ch):
    conv = nn.Sequential(
        nn.Conv2d(in_ch, out_ch, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(out_ch),
        nn.ReLU(inplace=True),
        nn.Conv2d(out_ch, out_ch, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(out_ch),
        nn.ReLU(inplace=True)
    )
    return conv

class UNET(nn.Module):
    def __init__(self):
        super(UNET, self).__init__()

        self.max_pool_2x2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.double_conv_1 = double_conv(3, 64)
        self.double_conv_2 = double_conv(64, 128)
        self.double_conv_3 = double_conv(128, 256)
        self.double_conv_4 = double_conv(256, 512)
        self.double_conv_5 = double_conv(512, 1024)

        self.up_trans_1 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
        self.up_conv_1 = double_conv(1024, 512)

        self.up_trans_2 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.up_conv_2 = double_conv(512, 256)

        self.up_trans_3 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.up_conv_3 = double_conv(256, 128)

        self.up_trans_4 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.up_conv_4 = double_conv(128, 64)

        self.final_conv = nn.Conv2d(64, 1, kernel_size=1)

    def forward(self, image):
        #DownSample
        x1 = self.double_conv_1(image)
        x2 = self.max_pool_2x2(x1)
        x3 = self.double_conv_2(x2)
        x4 = self.max_pool_2x2(x3)
        x5 = self.double_conv_3(x4)
        x6 = self.max_pool_2x2(x5)
        x7 = self.double_conv_4(x6)
        x8 = self.max_pool_2x2(x7)
        x9 = self.double_conv_5(x8)
        
        #Bottleneck 
        x = self.up_trans_1(x9)
        y = crop_image(x7, x)
        x = self.up_conv_1(torch.cat([x, y], dim=1))
        
        #UpSample
        x = self.up_trans_2(x)
        y = crop_image(x5, x)          
#         print(f"{y.shape} -- {x.shape}")
        x = self.up_conv_2(torch.cat([x, y], dim=1))

        x = self.up_trans_3(x)
        y = crop_image(x3, x)        
#         print(f"{y.shape} -- {x.shape}")
        x = self.up_conv_3(torch.cat([x, y], dim=1))

        x = self.up_trans_4(x)        
        y = crop_image(x1, x)
#         print(f"{y.shape} -- {x.shape}")
        x = self.up_conv_4(torch.cat([x, y], dim=1))
        
#         print(f"{x.shape}")

        x = self.final_conv(x)
        
        return x


In [26]:
import os
from PIL import Image
from torch.utils.data import Dataset
import numpy as np

class WatermarkDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.images = os.listdir(image_dir)

    def __len__(self):
        return len(self.images)

    # Make some changes here
    def __getitem__(self, index):
        img_path = os.path.join(self.image_dir, self.images[index])
        mask_path = os.path.join(self.mask_dir, self.images[index].replace('train2014', 'train2014_mask'))
        image = np.array(Image.open(img_path).convert("RGB"))
        mask = np.array(Image.open(mask_path).convert("L"), dtype=np.float32)
        mask[mask == 255.0] = 1.0

        # Ensure mask has the same dimensions as the image
        if image.shape[:2] != mask.shape:
            mask = cv2.resize(mask, (image.shape[1], image.shape[0]))

        if self.transform is not None:
            augmentations = self.transform(image=image, mask=mask)
            image = augmentations['image']
            mask = augmentations['mask']

        return image, mask

# Done


In [35]:
import albumentations as A
from albumentations.pytorch import ToTensorV2
import torch.optim as optim
from tqdm import tqdm
import matplotlib.pyplot as plt

LEARNING_RATE = 1e-5
DEVICE = ('cuda' if torch.cuda.is_available() else 'cpu')
BATCH_SIZE = 8
NUM_EPOCHS = 50
NUM_WORKERS = 2
IMAGE_HEIGHT = 572
IMAGE_WIDTH = 572
LOAD_MODEL = True 
TRAIN_IMG_DIR = '/kaggle/input/watermark-extraction/Datasets/images'
TRAIN_MASK_DIR = '/kaggle/input/watermark-extraction/Datasets/watermarks'
VAL_IMG_DIR = '/kaggle/input/watermark-extraction/Datasets/val_images'
VAL_MASK_DIR = '/kaggle/input/watermark-extraction/Datasets/val_watermarks'

def train_fn(loader, model, optimizer, loss_fn, scaler):
    loop = tqdm(loader)
    for batch_idx, (data, targets) in enumerate(loop):
        data = data.to(device=DEVICE)
        targets = targets.float().unsqueeze(1).to(device=DEVICE)

        # forward
        with torch.cuda.amp.autocast():
            predictions = model(data)
            predictions = F.interpolate(
                predictions, size=(572,572), mode='bilinear', align_corners=True
            )
#             print(f'{predictions.size()} -- {targets.size()}')
            loss = loss_fn(predictions, targets)

        # backward
        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        # update tqdm loop
        loop.set_postfix(loss=loss.item())
    return loss

def main():
    train_transform = A.Compose(
        [
            A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
            A.Rotate(limit=35, p=1.0),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.Normalize(
                mean=[0.0, 0.0, 0.0],
                std=[1.0, 1.0, 1.0],
                max_pixel_value=255.0,
            ),
            ToTensorV2(),
        ],
    )

    val_transform = A.Compose(
        [
            A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
            A.Normalize(
                mean=[0.0, 0.0, 0.0],
                std=[1.0, 1.0, 1.0],
                max_pixel_value=255.0,
            ),
            ToTensorV2(),
        ],
    )

    model = UNET().to(DEVICE)
    loss_fn = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

    train_loader, val_loader = get_loaders(
        TRAIN_IMG_DIR,
        TRAIN_MASK_DIR,
        VAL_IMG_DIR,
        VAL_MASK_DIR,
        BATCH_SIZE,
        train_transform,
        val_transform,
        NUM_WORKERS,
    )

    if LOAD_MODEL:
        load_checkpoint(torch.load("my_checkpoint.pth.tar"), model)

    check_accuracy(val_loader, model, device=DEVICE)
    scaler = GradScaler()

    for epoch in range(NUM_EPOCHS):
        train_loss_values = train_fn(train_loader, model, optimizer, loss_fn, scaler)

        # save model
        checkpoint = {
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
        }
        save_checkpoint(checkpoint)

        # check accuracy
        check_accuracy(val_loader, model, device=DEVICE)

        # print some examples to the folder
        save_predictions_as_imgs(
            val_loader, model, folder='saved_images/', device=DEVICE
        )




In [28]:
import torchvision
from torch.utils.data import DataLoader

def save_checkpoint(state, filename = 'my_checkpoint.pth.tar'):
    print("===> Saving the checkpoint....")
    torch.save(state, filename)

def load_checkpoint(checkpoint, model):
    print("===> Loading the checkpoint....")
    model.load_state_dict(checkpoint['state_dict'])

def get_loaders(
    train_img_dir,
    train_mask_dir,
    val_img_dir,
    val_mask_dir,
    batch_size,
    train_transform,
    val_transform,
    num_workers=4,
):

    train_ds = WatermarkDataset(
      image_dir = train_img_dir,
      mask_dir = train_mask_dir,
      transform = train_transform
  )

    train_loader = DataLoader(
      train_ds,
      batch_size = batch_size,
      num_workers = num_workers,
      shuffle = True
  )

    val_ds = WatermarkDataset(
      image_dir = val_img_dir,
      mask_dir = val_mask_dir,
      transform = val_transform
  )

    val_loader = DataLoader(
      val_ds,
      batch_size = batch_size,
      num_workers = num_workers,
      shuffle = False
  )

    return train_loader, val_loader

def check_accuracy(loader, model, device=DEVICE):
    num_correct = 0
    num_pixels =  0
    dice_score = 0
#     n = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device)
            y = y.to(device).unsqueeze(1)
            preds = torch.sigmoid(model(x))
            preds = (preds > 0.5).float()
            preds = F.interpolate(preds, size=(572,572), mode='bilinear', align_corners=True)
            num_correct += (preds == y).sum()
            num_pixels += torch.numel(y)
            intersection = torch.sum(preds * y)
            dice_score += (2.0 * intersection) / (torch.sum(preds) + torch.sum(y) + 1e-8)
#             print(len(x))
#             n += 1

    print(
        f"Got {num_correct}/{num_pixels} with acc {(num_correct/num_pixels)*100:.2f}"
        )
#     print(f"Dice : {dice_score.item()}")
    
#     print(f"Samples : {loader.dataset.scalar_value}")
#     print(f"{n} ")
    print(f"Dice Score : {dice_score.item()/95}")
    model.train()
    return (num_correct)/(num_pixels)



In [29]:
import os
import torch
import torchvision
from torchvision.utils import save_image

def save_predictions_as_imgs(loader, model, folder='saved_images/', device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')):
    model.eval()

    # Create the folder if it doesn't exist
    if not os.path.exists(folder):
        os.makedirs(folder)

    for idx, (x, y) in enumerate(loader):
        x = x.to(device)
        with torch.no_grad():
            preds = torch.sigmoid(model(x))
            preds = (preds > 0.5).float()
        save_image(
            preds, f"{folder}/pred_{idx}.png"
        )
        save_image(y.unsqueeze(1), f"{folder}/gt_{idx}.png")

    model.train()

In [None]:
main()

In [None]:
import shutil

# Assuming 'source_file' is the path to your file and 'destination' is where you want to save it
if not os.path.exists('/kaggle/working/output_folder/'):
        os.makedirs('/kaggle/working/output_folder/')
source_file = '/kaggle/working/output_folder/my_checkpoint.pth.tar'
destination = '/kaggle/working/my_checkpoint.pth.tar'

shutil.move(source_file, destination)
