In [None]:
!pip install kaggle
!mkdir -p /root/.config/kaggle
!sudo cp kaggle.json ~/.config/kaggle/
!sudo chmod 600 ~/.config/kaggle/kaggle.json

In [None]:
!pip install torch torchvision segmentation-models-pytorch albumentations opencv-python

In [None]:
!pip install tqdm

In [None]:
!kaggle datasets download -d tapakah68/segmentation-full-body-mads-dataset
!unzip segmentation-full-body-mads-dataset.zip

# UNet

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class DoubleConv(nn.Module):
    """(convolution => [BN] => ReLU) * 2"""
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            # dropout
            nn.Dropout2d(0.2),
            nn.Conv2d(out_channels, out_channels, 3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
        )

    def forward(self, x):
        return self.double_conv(x)

class UNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=1):
        super().__init__()

        self.down1 = DoubleConv(in_channels, 64)
        self.pool1 = nn.MaxPool2d(2)

        self.down2 = DoubleConv(64, 128)
        self.pool2 = nn.MaxPool2d(2)

        self.down3 = DoubleConv(128, 256)
        self.pool3 = nn.MaxPool2d(2)

        self.down4 = DoubleConv(256, 512)
        self.pool4 = nn.MaxPool2d(2)

        self.bottleneck = DoubleConv(512, 1024)

        self.up4 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
        self.dec4 = DoubleConv(1024, 512)

        self.up3 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.dec3 = DoubleConv(512, 256)

        self.up2 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.dec2 = DoubleConv(256, 128)

        self.up1 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.dec1 = DoubleConv(128, 64)

        self.final_conv = nn.Conv2d(64, out_channels, kernel_size=1)

    def forward(self, x):
        d1 = self.down1(x)
        d2 = self.down2(self.pool1(d1))
        d3 = self.down3(self.pool2(d2))
        d4 = self.down4(self.pool3(d3))

        bn = self.bottleneck(self.pool4(d4))

        up4 = self.up4(bn)
        up4 = torch.cat([up4, d4], dim=1)
        dec4 = self.dec4(up4)

        up3 = self.up3(dec4)
        up3 = torch.cat([up3, d3], dim=1)
        dec3 = self.dec3(up3)

        up2 = self.up2(dec3)
        up2 = torch.cat([up2, d2], dim=1)
        dec2 = self.dec2(up2)

        up1 = self.up1(dec2)
        up1 = torch.cat([up1, d1], dim=1)
        dec1 = self.dec1(up1)

        return self.final_conv(dec1)


In [None]:
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2
from torch.cuda.amp import autocast, GradScaler
from tqdm import tqdm


# Paths
image_dir = './segmentation_full_body_mads_dataset_1192_img/segmentation_full_body_mads_dataset_1192_img/images'
mask_dir = './segmentation_full_body_mads_dataset_1192_img/segmentation_full_body_mads_dataset_1192_img/masks'

# -------- Dataset -------- #

class SegmentationDataset(Dataset):
    def __init__(self, image_paths, mask_paths, transforms=None):
        self.image_paths = image_paths
        self.mask_paths = mask_paths
        self.transforms = transforms

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = cv2.imread(self.image_paths[idx])           # BGR image
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)      # Convert to RGB
        mask = cv2.imread(self.mask_paths[idx], cv2.IMREAD_GRAYSCALE)

        mask = (mask > 127).astype(np.float32)
        # print("mask unique values:", np.unique(mask))


        if self.transforms:
            augmented = self.transforms(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask'].unsqueeze(0)

        return image, mask

# -------- Transforms -------- #
train_transform = A.Compose([
    A.Resize(256, 256),
    A.HorizontalFlip(p=0.5),
    A.Normalize(),
    ToTensorV2()
])

val_transform = A.Compose([
    A.Resize(256, 256),
    A.Normalize(),
    ToTensorV2()
])

# -------- Data Split -------- #
image_files = sorted([os.path.join(image_dir, f) for f in os.listdir(image_dir)])
mask_files = sorted([os.path.join(mask_dir, f) for f in os.listdir(mask_dir)])

train_imgs, val_imgs, train_masks, val_masks = train_test_split(
    image_files, mask_files, test_size=0.2, random_state=42
)

train_dataset = SegmentationDataset(train_imgs, train_masks, transforms=train_transform)
val_dataset = SegmentationDataset(val_imgs, val_masks, transforms=val_transform)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=16, num_workers=4, pin_memory=True)


# -------- Model -------- #
model = UNet()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# -------- Loss & Optimizer -------- #
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# -------- IoU Metric -------- #
def iou_score(preds, targets, threshold=0.5):
    # print("preds shape:", preds.shape)
    # print("targets shape:", targets.shape)
    # predicted_mask = torch.sigmoid(preds) > threshold
    predicted_mask = preds > threshold
    predicted_mask = predicted_mask.bool()
    true_mask = targets.bool()
    intersection = (predicted_mask & true_mask).sum(dim=(1, 2, 3))
    union = (predicted_mask | true_mask).sum(dim=(1, 2, 3))
    iou = (intersection + 1e-6) / (union + 1e-6)
    return iou.mean().item()


# -------- Training Loop -------- #
scaler = GradScaler()

def train_one_epoch(model, loader, optimizer, criterion):
    model.train()
    total_loss = 0
    for imgs, masks in tqdm(loader, desc="Training"):
        imgs, masks = imgs.to(device, non_blocking=True), masks.to(device, non_blocking=True)
        optimizer.zero_grad()
        with autocast():
            preds = model(imgs)
            loss = criterion(preds, masks)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        total_loss += loss.item()
    return total_loss / len(loader)

def evaluate(model, loader):
    model.eval()
    total_iou = 0
    with torch.no_grad():
        for imgs, masks in loader:
            imgs, masks = imgs.to(device, non_blocking=True), masks.to(device, non_blocking=True)
            preds = model(imgs)
            preds = torch.sigmoid(preds)  # Apply sigmoid once here
            total_iou += iou_score(preds, masks)
    return total_iou / len(loader)

# -------- Run Training -------- #
epochs = 25
train_losses = []
val_ious = []
for epoch in range(epochs):
    print("starting to train")
    train_loss = train_one_epoch(model, train_loader, optimizer, criterion)
    val_iou = evaluate(model, val_loader)
    train_losses.append(train_loss)
    val_ious.append(val_iou)
    print(f"Epoch {epoch+1}/{epochs} | Train Loss: {train_loss:.4f} | Val IoU: {val_iou:.4f}")


# ResNet

In [None]:
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import segmentation_models_pytorch as smp
import albumentations as A
from albumentations.pytorch import ToTensorV2


# -------- Model -------- #
model = smp.Unet(
    encoder_name="resnet34",
    encoder_weights="imagenet",
    in_channels=3,
    classes=1,
    activation=None  # We'll apply sigmoid manually
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# -------- Loss & Optimizer -------- #
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)



# -------- Run Training -------- #
epochs = 20
train_losses = []
val_ious = []
for epoch in range(epochs):
    train_loss = train_one_epoch(model, train_loader, optimizer, criterion)
    val_iou = evaluate(model, val_loader)
    train_losses.append(train_loss)
    val_ious.append(val_iou)
    print(f"Epoch {epoch+1}/{epochs} | Train Loss: {train_loss:.4f} | Val IoU: {val_iou:.4f}")


# Zero Shot

In [None]:
!pip install -U git+https://github.com/luca-medeiros/lang-segment-anything.git

In [None]:
from PIL import Image
from lang_sam import LangSAM
import numpy as np
import os
import matplotlib.pyplot as plt
import kaggle
import cv2

model = LangSAM()
text_prompt = "person."

In [None]:
def intersection_over_union_metric(predicted_mask, true_mask):
    predicted_mask = predicted_mask.astype(bool)
    true_mask = true_mask.astype(bool)
    intersection = np.logical_and(predicted_mask, true_mask).sum()
    union = np.logical_or(predicted_mask, true_mask).sum()
    if union == 0:
        return 0.0
    iou = intersection / union
    return iou

In [None]:
def evaluate_model_on_test_set(test_images_dir='./test_images',
                                test_masks_dir='./test_masks',
                                text_prompt="person.",
                                evaluation_function=intersection_over_union_metric
                                ):
    iou_results = []

    # Ensure the directories exist
    if not os.path.exists(test_images_dir):
        print(f"Test images directory not found: {test_images_dir}")
    elif not os.path.exists(test_masks_dir):
        print(f"Test masks directory not found: {test_masks_dir}")
    else:
        image_files = [f for f in os.listdir(test_images_dir) if f.endswith(('.png', '.jpg', '.jpeg'))]

        for image_file in image_files:
            image_path = os.path.join(test_images_dir, image_file)
            mask_file = image_file # Assuming the mask file has the same name as the image file
            mask_path = os.path.join(test_masks_dir, mask_file)

            if not os.path.exists(mask_path):
                print(f"Ground truth mask not found for {image_file}")
                continue

            try:
              # Load the image
              image_pil = Image.open(image_path).convert("RGB")
              image_np = np.array(image_pil)

              # Load the ground truth mask
              # Assuming the ground truth mask is a grayscale image where the wound is white (255) and background is black (0)
              true_mask_pil = Image.open(mask_path).convert("L")
              true_mask_np = np.array(true_mask_pil) > 0 # Convert to boolean mask
              # Predict the mask using LangSAM
              # LangSAM expects a list of images and a list of prompts
              result = model.predict([image_pil], [text_prompt])
              scores = result[0]['scores'] # float array
              boxes = result[0]['boxes']
              masks = result[0]['masks']
              masks_scores = result[0]['mask_scores'] # float array

              if len(masks) == 0:
                  print(f"No Prediction found for {image_file}")
                  iou_results.append({'image': image_file, 'iou': np.nan})
                  continue

              if masks is not None and not(isinstance(masks, list)):
                  if masks_scores.ndim == 0:
                      masks_scores = [masks_scores.item()]
                  if scores.ndim == 0:
                      scores = [scores.item()]

                  # LangSAM masks are shape [num_masks, H, W]
                  max_index = 0
                  for i, predicted_mask in enumerate(masks):
                      # find the mask that has the highest score
                      if masks_scores[max_index] < masks_scores[i]:
                        if scores[max_index] < scores[i]:
                          max_index = i

                  predicted_mask_np = masks[max_index]
                  result_iou = evaluation_function(predicted_mask_np, true_mask_np)

              iou_results.append({'image': image_file, 'iou': result_iou})
              print(f"Processed {image_file}, IoU: {result_iou:.4f}")
            except Exception as e:
                print(f"Error processing {image_file}: {e}")
                iou_results.append({'image': image_file, 'iou': np.nan}) # Append NaN for errors

    # Print average IoU
    if len(iou_results) > 0:
        average_iou = np.nanmean([res['iou'] for res in iou_results])
        print(f"\nAverage IoU across test set: {average_iou:.4f}")
    else:
        print("No images were processed.")

    return iou_results


In [None]:
test_images_dir = './segmentation_full_body_mads_dataset_1192_img/segmentation_full_body_mads_dataset_1192_img/images'
test_masks_dir = './segmentation_full_body_mads_dataset_1192_img/segmentation_full_body_mads_dataset_1192_img/masks'
text_prompt = "person."

iou_results = evaluate_model_on_test_set(test_images_dir, test_masks_dir, text_prompt, intersection_over_union_metric)


In [None]:
# Sort the iou_results by IoU in ascending order
sorted_iou_results = sorted(iou_results, key=lambda x: x['iou'])

# Get the lowest values and their corresponding image names
lowest_iou_results = sorted_iou_results[:20] # Get the bottom 10

print("Lowest IoU values and corresponding image names:")
for result in lowest_iou_results:
  print(f"Image: {result['image']}, IoU: {result['iou']:.4f}")

In [None]:
iou_scores = [res['iou'] for res in iou_results if not np.isnan(res['iou'])]

if len(iou_scores) >= 0:
    plt.figure(figsize=(10, 6))
    plt.hist(iou_scores, bins=25, edgecolor='black')
    plt.title('Distribution of IoU Scores on Test Set')
    plt.xlabel('IoU Score')
    plt.ylabel('Frequency')
    plt.grid(axis='y', alpha=0.75)
    plt.show()
else:
    print("No valid IoU scores available to plot the histogram.")

In [None]:
# Compute average IoU
if len(iou_results) > 0:
    # Filter out NaN values before computing the mean
    valid_ious = [res['iou'] for res in iou_results if not np.isnan(res['iou'])]
    if len(valid_ious) > 0:
        average_iou = np.mean(valid_ious)
        print(f"\nAverage IoU across test set: {average_iou:.4f}")
    else:
        print("No valid IoU results were computed.")
else:
    print("No images were processed.")
