In [1]:
import os
from PIL import Image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datasets import Dataset
from torch.utils.data import DataLoader
from PIL import Image
from tqdm import tqdm
from transformers import SamModel, SamConfig, SamProcessor
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Paths to the folders containing the patches and labels
data_dir = "/data3/animesh/vscode/Kaggle/dataset"
patches_dir = os.path.join(data_dir, "training_patches")
labels_dir = os.path.join(data_dir, "training_noisy_labels")

# Function to load all images in a directory
def load_images_from_folder(folder):
    images = []
    filenames = os.listdir(folder)
    for filename in filenames:
        if filename.endswith(".png"):
            img_path = os.path.join(folder, filename)
            img = Image.open(img_path)#.convert('RGB')  # Converting to RGB for consistency
            img = np.array(img)  # Convert to numpy array
            images.append(img)
    return images, filenames

# Load training patches and noisy labels
patches, patch_filenames = load_images_from_folder(patches_dir)
noisy_labels, label_filenames = load_images_from_folder(labels_dir)

In [3]:
for i in range(5):
    print(f"{patch_filenames[i]} ||||| {label_filenames[i]}")

1_11_86_0_836.png ||||| 1_11_86_0_836.png
1_12_78_836_1044.png ||||| 1_12_78_836_1044.png
0_31_59_418_627.png ||||| 0_31_59_418_627.png
1_19_79_627_836.png ||||| 1_19_79_627_836.png
1_7_95_0_836.png ||||| 1_7_95_0_836.png


In [4]:
images = np.array(patches)
masks = np.array(noisy_labels)

In [5]:
# Create a list to store the indices of non-empty masks
valid_indices = [i for i, mask in enumerate(masks) if mask.max() != 0]
# Filter the image and mask arrays to keep only the non-empty pairs
filtered_images = images[valid_indices]
filtered_masks = masks[valid_indices]
filtered_patch_filenames = [patch_filenames[i] for i in valid_indices]
filtered_label_filenames = [label_filenames[i] for i in valid_indices]

In [6]:
print("Image shape:", filtered_images.shape)  # e.g., (num_frames, height, width, num_channels)
print("Mask shape:", filtered_masks.shape)
print(len(filtered_patch_filenames))
print(len(filtered_label_filenames))

Image shape: (4836, 256, 256, 3)
Mask shape: (4836, 256, 256)
4836
4836


In [7]:
# Convert the NumPy arrays to Pillow images and store them in a dictionary
dataset_dict = {
    "image": [Image.fromarray(img) for img in filtered_images],
    "label": [Image.fromarray(mask) for mask in filtered_masks],
}

# Create the dataset using the datasets.Dataset class
dataset = Dataset.from_dict(dataset_dict)

In [8]:
dataset

Dataset({
    features: ['image', 'label'],
    num_rows: 4836
})

# Score Generation

In [5]:
# Load the model configuration
model_config = SamConfig.from_pretrained("facebook/sam-vit-base")
processor = SamProcessor.from_pretrained("facebook/sam-vit-base")

# Create an instance of the model architecture with the loaded configuration
my_mito_model = SamModel(config=model_config)
#Update the model by loading the weights from saved file.
my_mito_model.load_state_dict(torch.load("/data3/animesh/vscode/Kaggle/customized_SAM_epoch_9.pth"))

  my_mito_model.load_state_dict(torch.load("/data3/animesh/vscode/Kaggle/customized_SAM_epoch_9.pth"))


<All keys matched successfully>

In [6]:
# set the device to cuda if available, otherwise use cpu
device = "cuda" if torch.cuda.is_available() else "cpu"
my_mito_model.to(device)

SamModel(
  (shared_image_embedding): SamPositionalEmbedding()
  (vision_encoder): SamVisionEncoder(
    (patch_embed): SamPatchEmbeddings(
      (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    )
    (layers): ModuleList(
      (0-11): 12 x SamVisionLayer(
        (layer_norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): SamVisionAttention(
          (qkv): Linear(in_features=768, out_features=2304, bias=True)
          (proj): Linear(in_features=768, out_features=768, bias=True)
        )
        (layer_norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): SamMLPBlock(
          (lin1): Linear(in_features=768, out_features=3072, bias=True)
          (lin2): Linear(in_features=3072, out_features=768, bias=True)
          (act): GELUActivation()
        )
      )
    )
    (neck): SamVisionNeck(
      (conv1): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (layer_norm1): SamLayerNorm()
     

In [7]:
def get_bounding_box(ground_truth_map):
  # get bounding box from mask
  y_indices, x_indices = np.where(ground_truth_map > 0)
  x_min, x_max = np.min(x_indices), np.max(x_indices)
  y_min, y_max = np.min(y_indices), np.max(y_indices)
  # add perturbation to bounding box coordinates
  H, W = ground_truth_map.shape
  x_min = max(0, x_min - np.random.randint(0, 20))
  x_max = min(W, x_max + np.random.randint(0, 20))
  y_min = max(0, y_min - np.random.randint(0, 20))
  y_max = min(H, y_max + np.random.randint(0, 20))
  bbox = [x_min, y_min, x_max, y_max]

  return bbox

In [10]:
prediction = []
for idx in tqdm(range(len(dataset))):
    test_image = dataset[idx]["image"]
    # get box prompt based on ground truth segmentation map
    ground_truth_mask = np.array(dataset[idx]["label"])
    prompt = get_bounding_box(ground_truth_mask)

    # prepare image + box prompt for the model
    inputs = processor(test_image, input_boxes=[[prompt]], return_tensors="pt")

    # Move the input tensor to the GPU if it's not already there
    inputs = {k: v.to(device) for k, v in inputs.items()}

    my_mito_model.eval()

    # forward pass
    with torch.no_grad():
        outputs = my_mito_model(**inputs, multimask_output=False)

    # apply sigmoid
    medsam_seg_prob = torch.sigmoid(outputs.pred_masks.squeeze(1))
    # medsam_seg_prob = outputs.pred_masks.squeeze(1)
    # convert soft mask to hard mask
    medsam_seg_prob = medsam_seg_prob.cpu().numpy().squeeze()
    medsam_seg = (medsam_seg_prob > 0.5).astype(np.uint8)
    prediction.append(medsam_seg )

  0%|          | 7/5000 [00:25<5:06:23,  3.68s/it]


ValueError: zero-size array to reduction operation minimum which has no identity

In [31]:
def binaryMaskIOU(mask1, mask2):   # From the question.
    mask1_area = np.count_nonzero(mask1 == 1)
    mask2_area = np.count_nonzero(mask2 == 1)
    intersection = np.count_nonzero(np.logical_and( mask1==1,  mask2==1 ))
    iou = intersection/(mask1_area+mask2_area-intersection)
    return iou

In [32]:
IOU_bucket = []
for idx in tqdm(range(len(prediction))):
    mask1=prediction[idx]
    mask2=filtered_masks[idx]
    IOU= binaryMaskIOU(mask1, mask2)
    IOU_bucket.append(IOU)

100%|██████████| 4836/4836 [00:00<00:00, 34980.44it/s]


In [34]:
df = pd.DataFrame({
    'imageid': filtered_patch_filenames,
    'iou_score': IOU_bucket
})
df.head()

Unnamed: 0,imageid,iou_score
0,1_11_86_0_836.png,0.360032
1,1_12_78_836_1044.png,0.433616
2,0_31_59_418_627.png,0.479887
3,1_19_79_627_836.png,0.736264
4,1_7_95_0_836.png,0.66407


In [35]:
df.to_csv('IOU_epoch_10_filtered.csv')

In [36]:
# Sort the DataFrame by IoU score (ascending order)
df_sorted = df.sort_values(by='iou_score', ascending=False)

# Print the sorted DataFrame
print(df_sorted.head())

                   imageid  iou_score
4672   1_18_71_627_418.png   0.954265
2203  0_21_19_836_1044.png   0.948350
2026    2_26_34_0_1044.png   0.948333
6      1_21_76_836_627.png   0.946957
1316    1_8_85_627_836.png   0.944241
