In [None]:
!pip install kagglehub
!pip install segmentation-models-pytorch

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("abdallahwagih/kvasir-dataset-for-classification-and-segmentation")

print("Path to dataset files:", path)

In [None]:
mv /root/.cache/kagglehub/datasets/abdallahwagih/kvasir-dataset-for-classification-and-segmentation/versions/1 ./

In [98]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from glob import glob

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

import segmentation_models_pytorch as smp
from sklearn.model_selection import train_test_split

In [99]:
Image_path = '/content/1/kvasir-seg/Kvasir-SEG/images'
Mask_path = '/content/1/kvasir-seg/Kvasir-SEG/masks'

In [None]:
batch_size = 16
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

In [101]:
class segmentationDataset(Dataset):
  def __init__(self, image_paths: list, mask_paths: list, transform=None):
    self.image_paths = image_paths
    self.mask_paths = mask_paths
    self.transform = transform

  def __len__(self):
    return len(self.image_paths)

  def __getitem__(self, idx):
    image = cv2.imread(self.image_paths[idx])
    image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGB)
    image = cv2.resize(image, (256, 256))
    image = image.astype(np.float32)
    image /= 255.0

    mask = cv2.imread(self.mask_paths[idx], cv2.IMREAD_GRAYSCALE)
    mask = cv2.resize(mask, (256, 256))
    mask = mask.astype('float32')
    mask /= 255.0
    mask = np.expand_dims(mask, axis=0)

    image = torch.tensor(image).permute(2, 0, 1)
    mask = torch.tensor(mask)

    return image, mask

In [102]:
image_paths = sorted(glob(os.path.join(Image_path, '*jpg')))
mask_paths = sorted(glob(os.path.join(Mask_path, '*jpg')))

In [None]:
train_images, test_images, train_mask, test_mask = train_test_split(image_paths, mask_paths, test_size=0.3, random_state=42)

len(train_images), len(test_images), len(train_mask), len(test_mask)

In [None]:
train_dataset = segmentationDataset(train_images, train_mask)
test_dataset = segmentationDataset(test_images, test_mask)

len(train_dataset), len(test_dataset)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

len(train_loader), len(test_loader)

In [106]:
image, mask = train_dataset[0]

In [None]:
print(image.size())
print(mask.size())

In [None]:
plt.imshow(image.permute(1, 2, 0))
plt.show()

In [None]:
plt.imshow(mask.permute(1,2,0))
plt.show()

## *Model*

https://pypi.org/project/segmentation-models-pytorch/

In [110]:
model = smp.Unet(encoder_name='resnet50', encoder_weights='imagenet', in_channels=3, classes=1)
model = model.to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


## Training

In [None]:
best_loss = float('inf')
num_epochs = 15

for epoch in range(num_epochs):
  model.train()
  train_loss = 0.0

  for images, masks in train_loader:
    images = images.to(device)
    masks = masks.to(device)

    optimizer.zero_grad()
    outputs = model(images)
    loss = criterion(outputs, masks)
    loss.backward()
    optimizer.step()

    train_loss += loss.item()

  model.eval()
  test_loss = 0.0

  with torch.no_grad():
    for images, masks in test_loader:
      images = images.to(device)
      masks = masks.to(device)

      outputs = model(images)
      loss = criterion(outputs, masks)
      test_loss += loss.item()

  train_loss /= len(train_loader)
  test_loss /= len(test_loader)

  print(f'epoch: {epoch+1}/{num_epochs}, train loss: {train_loss}, test loss: {test_loss}')

  if test_loss < best_loss:
    best_loss = test_loss
    torch.save(model.state_dict(), 'best_model.pth')
    print('Model saved!')

## Visualizer

In [118]:
def visualize_predictions(model, image_path, mask_image, device):
  image = cv2.imread(image_path)
  image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGB)
  image = cv2.resize(image, (256, 256))
  image = image.astype(np.float32)
  image /= 255.0

  mask_image = cv2.imread(mask_image)
  mask_image = cv2.cvtColor(mask_image, cv2.COLOR_BGRA2RGB)
  mask_image = cv2.resize(mask_image, (256, 256))
  mask_image = mask_image.astype(np.float32)
  mask_image /= 255.0

  image_tensor = torch.tensor(image).permute(2, 0, 1).unsqueeze(0)
  image_tensor = image_tensor.to(device)

  model.eval()
  with torch.no_grad():
    output = model(image_tensor)
    output = torch.sigmoid(output).cpu().squeeze().numpy()
    output = (output > 0.5).astype(np.uint8)

  plt.figure(figsize=(10, 5))
  plt.subplot(1, 4, 1)
  plt.title('Image')
  plt.imshow(image)

  plt.subplot(1, 4, 2)
  plt.title('generatend Mask')
  plt.imshow(output)

  plt.subplot(1, 4, 3)
  plt.title('original Mask')
  plt.imshow(mask_image)

  plt.subplot(1,4 , 4)
  plt.title('overall visual')
  plt.imshow(image)
  plt.imshow(output, cmap='jet', alpha=0.5)

  plt.show()

In [None]:
model.load_state_dict(torch.load('best_model.pth'))

In [None]:
image_name = 'cju0rx1idathl0835detmsp84.jpg'
sample_image = '/content/1/kvasir-seg/Kvasir-SEG/images/'+image_name
mask_image = '/content/1/kvasir-seg/Kvasir-SEG/masks/'+image_name
visualize_predictions(model, sample_image, mask_image, device)