# Environment and MedSAM setup and  prep

In [2]:
# Set up collab runtime with gpu and mount the dirive to access the files
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [6]:
# Clone the github repository
!git clone https://github.com/bowang-lab/MedSAM.git
%cd MedSAM


Cloning into 'MedSAM'...
remote: Enumerating objects: 967, done.[K
remote: Counting objects: 100% (354/354), done.[K
remote: Compressing objects: 100% (117/117), done.[K
remote: Total 967 (delta 286), reused 237 (delta 237), pack-reused 613 (from 2)[K
Receiving objects: 100% (967/967), 62.89 MiB | 14.46 MiB/s, done.
Resolving deltas: 100% (479/479), done.
/content/MedSAM


In [7]:
# Install MedSAM as a package - following readme from github
!pip install -e .


Obtaining file:///content/MedSAM
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting SimpleITK>=2.2.1 (from medsam==0.0.1)
  Downloading simpleitk-2.5.2-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (7.2 kB)
Collecting ipympl (from medsam==0.0.1)
  Downloading ipympl-0.9.7-py3-none-any.whl.metadata (8.7 kB)
Collecting jupyterlab (from medsam==0.0.1)
  Downloading jupyterlab-4.4.6-py3-none-any.whl.metadata (16 kB)
Collecting async-lru>=1.0.0 (from jupyterlab->medsam==0.0.1)
  Downloading async_lru-2.0.5-py3-none-any.whl.metadata (4.5 kB)
Collecting jupyter-lsp>=2.0.0 (from jupyterlab->medsam==0.0.1)
  Downloading jupyter_lsp-2.2.6-py3-none-any.whl.metadata (1.8 kB)
Collecting jupyter-server<3,>=2.4.0 (from jupyterlab->medsam==0.0.1)
  Downloading jupyter_server-2.16.0-py3-none-any.whl.metadata (8.5 kB)
Collecting jupyterlab-server<3,>=2.27.1 (from jupyterlab->medsam==0.0.1)
  Downloading jupyterlab_server-2.27.3-py3-none-any.whl.metadata (5.9 kB)
Collect

In [9]:
# Checking to make sure the file location exists for checkpoint
import os

checkpoint_path = '/content/drive/MyDrive/MedSAM/work_dir/MedSAM/medsam_vit_b.pth'
print("Checkpoint exists:", os.path.exists(checkpoint_path))


Checkpoint exists: True


In [10]:
#Copy pretrained checkpoint to MedSAM dir
!mkdir -p work_dir/MedSAM
!cp /content/drive/MyDrive/MedSAM/work_dir/MedSAM/medsam_vit_b.pth work_dir/MedSAM/


In [11]:
# Have to convert the jpg to npy since that is the input expected by MedSAM - starting with a small sample size of images to test and make sure model works (15 images)
import os
import cv2
import numpy as np
from tqdm import tqdm #to see progress

def convert_jpg_to_npy(image_dir, mask_dir, output_img_dir, output_mask_dir):
    os.makedirs(output_img_dir, exist_ok=True)
    os.makedirs(output_mask_dir, exist_ok=True)

    image_names = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]
    #need to load image and mask and have them resized to match the training script for MedSAM (from MedSAM's github) so that it will work with the model
    for name in tqdm(image_names):
        # Loading and normalizing image
        img = cv2.imread(os.path.join(image_dir, name))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_resized = cv2.resize(img, (1024, 1024), interpolation=cv2.INTER_CUBIC)
        img_norm = img_resized.astype(np.float32) / 255.0 #for white
        np.save(os.path.join(output_img_dir, name.replace('.jpg', '.npy')), img_norm)

        # need to binarize mask
        mask = cv2.imread(os.path.join(mask_dir, name), cv2.IMREAD_GRAYSCALE)
        mask_resized = cv2.resize(mask, (256, 256), interpolation=cv2.INTER_NEAREST)
        mask_bin = (mask_resized > 0).astype(np.uint8) #for 0 or 1 same as training script
        np.save(os.path.join(output_mask_dir, name.replace('.jpg', '.npy')), mask_bin)

    print(" Finished converting images and masks to .npy format")

# Run conversion with correct Drive paths (this is why you have to make sure that the files I mentioned in the MRP GitHub are placed appropriately)
convert_jpg_to_npy(
    image_dir='/content/drive/MyDrive/MedSAM/data/ctrus_sample/images',
    mask_dir='/content/drive/MyDrive/MedSAM/data/ctrus_sample/masks',
    output_img_dir='/content/MedSAM/data/npy_sample/imgs',
    output_mask_dir='/content/MedSAM/data/npy_sample/gts'
)



100%|██████████| 15/15 [00:26<00:00,  1.79s/it]

 Finished converting images and masks to .npy format





In [4]:
# Libraries needed to run training with MedSAM
!pip install monai


Collecting monai
  Downloading monai-1.5.0-py3-none-any.whl.metadata (13 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<2.7.0,>=2.4.1->monai)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<2.7.0,>=2.4.1->monai)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<2.7.0,>=2.4.1->monai)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<2.7.0,>=2.4.1->monai)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<2.7.0,>=2.4.1->monai)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch<2.7.0,

In [12]:
# Training with one gpu on the sample image-mask pairs to test the model before moving on with full dataset
!python /content/drive/MyDrive/MedSAM/fine_tune_scripts/train_one_gpu_medsam_with_evaluation_collab.py \
  -i /content/MedSAM/data/npy_sample \
  -checkpoint work_dir/MedSAM/medsam_vit_b.pth \
  --device cuda \
  -num_epochs 5 \
  -batch_size 1


2025-08-17 20:28:03.696814: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1755462483.926370    6248 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1755462483.990920    6248 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1755462484.467162    6248 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1755462484.467200    6248 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1755462484.467204    6248 computation_placer.cc:177] computation placer alr

Only need to run the following cell if it is the first time creating a dir for the training group (with small sample size for testing)


In [13]:
### Now ill look at using different training groups according to the experiments (keeping to the smaller sample sizes in order to test the
### models and make sure it works properly before conducting it on the entire dataset)

import os
import pandas as pd
from shutil import copyfile

# Paths
csv_path = '/content/drive/MyDrive/MedSAM/data/ctrus/c-trus.filtered.csv'
image_dir = '/content/drive/MyDrive/MedSAM/data/ctrus/images'
mask_dir = '/content/drive/MyDrive/MedSAM/data/ctrus/masks'

# Saving to google drive so it persists
output_image_dir = '/content/drive/MyDrive/MedSAM/data/high_quality_sample/images'
output_mask_dir = '/content/drive/MyDrive/MedSAM/data/high_quality_sample/masks'

#Create output folders
os.makedirs(output_image_dir, exist_ok=True)
os.makedirs(output_mask_dir, exist_ok=True)

# Load filtered CSV
df = pd.read_csv(csv_path)

# Select only high-quality images
hq_df = df[df['quality'] == 0]
print(f" Found {len(hq_df)} high-quality images")

# Limitng to 15 for balanced comparison
hq_df = hq_df.head(15)

# Copy files
for fname in hq_df['file']:
    src_img = os.path.join(image_dir, fname)
    src_mask = os.path.join(mask_dir, fname)
    dst_img = os.path.join(output_image_dir, fname)
    dst_mask = os.path.join(output_mask_dir, fname)

    if os.path.exists(src_img) and os.path.exists(src_mask):
        copyfile(src_img, dst_img)
        copyfile(src_mask, dst_mask)
        print(f"Copied {fname}")
    else:
        print(f"Skipped {fname} (missing)")

print("Done copying high-quality samples to Google Drive.")



 Found 170 high-quality images
Copied E26MT6C.jpg
Copied AQY3DE2.jpg
Copied 0311XXS.jpg
Copied XIHLEX6.jpg
Copied ZMMN2NR.jpg
Copied VIN02HN.jpg
Copied 0MVW1GD.jpg
Copied ESN816N.jpg
Copied 4PK5KEG.jpg
Copied H29LQ2S.jpg
Copied MIQ5791.jpg
Copied HXCL6TN.jpg
Copied BDI4WIR.jpg
Copied P94IESQ.jpg
Copied Z41418X.jpg
Done copying high-quality samples to Google Drive.


In [14]:
# Now i can use the previously used function to convert jpg to npy for the new group of images
convert_jpg_to_npy(
    image_dir='/content/drive/MyDrive/MedSAM/data/high_quality_sample/images',
    mask_dir='/content/drive/MyDrive/MedSAM/data/high_quality_sample/masks',
    output_img_dir='/content/MedSAM/data/npy_high/imgs',
    output_mask_dir='/content/MedSAM/data/npy_high/gts'
)


100%|██████████| 15/15 [00:00<00:00, 17.46it/s]

 Finished converting images and masks to .npy format





In [16]:
# Fine tuning MedSam with the new training group
!python /content/drive/MyDrive/MedSAM/fine_tune_scripts/train_one_gpu_medsam_with_evaluation_collab.py \
  -i /content/MedSAM/data/npy_high \
  -checkpoint work_dir/MedSAM/medsam_vit_b.pth \
  --device cuda \
  -num_epochs 5 \
  -batch_size 1


2025-08-17 20:32:51.193266: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1755462771.214877    7583 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1755462771.221195    7583 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1755462771.237370    7583 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1755462771.237395    7583 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1755462771.237399    7583 computation_placer.cc:177] computation placer alr

In [17]:
# After checking if the sample above works with the rest of the code then you can use the whole dataset to make final training group
import os
import pandas as pd
from shutil import copyfile

# Paths
csv_path = '/content/drive/MyDrive/MedSAM/data/ctrus/c-trus.filtered.csv'
image_dir = '/content/drive/MyDrive/MedSAM/data/ctrus/images'
mask_dir = '/content/drive/MyDrive/MedSAM/data/ctrus/masks'

# Saving persistent location in google drive
output_image_dir = '/content/drive/MyDrive/MedSAM/data/high_quality_full/images'
output_mask_dir = '/content/drive/MyDrive/MedSAM/data/high_quality_full/masks'

# Create output folders
os.makedirs(output_image_dir, exist_ok=True)
os.makedirs(output_mask_dir, exist_ok=True)

# Load filtered CSV
df = pd.read_csv(csv_path)

# Getting all high-quality images
hq_df = df[df['quality'] == 0]
print(f"Found {len(hq_df)} high-quality images")

# Copy files (skip if already copied)
copied, skipped = 0, 0
for fname in hq_df['file']:
    src_img = os.path.join(image_dir, fname)
    src_mask = os.path.join(mask_dir, fname)
    dst_img = os.path.join(output_image_dir, fname)
    dst_mask = os.path.join(output_mask_dir, fname)

    if os.path.exists(src_img) and os.path.exists(src_mask):
        if not os.path.exists(dst_img):
            copyfile(src_img, dst_img)
            copyfile(src_mask, dst_mask)
            copied += 1
        else:
            skipped += 1
    else:
        print(f" Missing {fname}")

print(f" Done. Copied: {copied}, Skipped (already exist): {skipped}")


Found 170 high-quality images
 Done. Copied: 0, Skipped (already exist): 170


In [18]:
# Same conversion but with full dataset
convert_jpg_to_npy(
    image_dir='/content/drive/MyDrive/MedSAM/data/high_quality_full/images',
    mask_dir='/content/drive/MyDrive/MedSAM/data/high_quality_full/masks',
    output_img_dir = '/content/drive/MyDrive/MedSAM/data/npy_high_full/imgs',
    output_mask_dir = '/content/drive/MyDrive/MedSAM/data/npy_high_full/gts'
)

100%|██████████| 170/170 [07:54<00:00,  2.79s/it]

 Finished converting images and masks to .npy format





In [None]:
# Since the training script saves the trained path locally I need to save it to the drive so that it doesnt need to be rerun every time the runtime is reset
!mkdir -p /content/drive/MyDrive/MedSAM/work_dir/MedSAM

!mkdir -p /content/work_dir  # ensure parent directory exists
!ln -s /content/drive/MyDrive/MedSAM/work_dir/MedSAM /content/work_dir/MedSAM




In [None]:
# Do not need to run every time if you have already run it once and have the best path saved to drive
#use the full high quality set for training - using the same script but with change so that it saves the trained model sheckpoint since it will be used in experiments
!python /content/drive/MyDrive/MedSAM/fine_tune_scripts/train_one_gpu_medsam_with_evaluation_collab.py \
  -i /content/drive/MyDrive/MedSAM/data/npy_high_full \
  -checkpoint work_dir/MedSAM/medsam_vit_b.pth \
  --device cuda \
  -num_epochs 20 \
  -batch_size 1


2025-07-26 18:27:39.233006: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753554459.253808   26225 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753554459.260003   26225 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-07-26 18:27:39.281108: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
✅ Number of image-mask pairs: 170
100% 170/170 [03:41<00:00,  1.30s/it]
Epoch 0: Loss = 0.4533
Epoch 0 Metrics -> Dice: 0.607

# Experiment 1: Filter Low-Quality Samples

In [None]:
import pandas as pd

# Load metadata and filter low-quality samples
csv_path = "/content/drive/MyDrive/MedSAM/data/ctrus/c-trus.filtered.csv"
df = pd.read_csv(csv_path)

# Filter rows where quality_name is 'low'
df_low = df[df['quality_name'] == 'low']
low_quality_filenames = df_low['file'].tolist()

print(" Number of low-quality images found:", len(low_quality_filenames))
print("Sample filenames:", low_quality_filenames[:5])


✅ Number of low-quality images found: 93
🖼️ Sample filenames: ['D1K2TQS.jpg', 'KSXTAWQ.jpg', 'R9Z0ECR.jpg', '62GWY4Y.jpg', '6LNEH6Y.jpg']


In [None]:
# Need to copy low quality samples
import os
import pandas as pd
from shutil import copyfile

# Define paths
csv_path = '/content/drive/MyDrive/MedSAM/data/ctrus/c-trus.filtered.csv'
image_dir = '/content/drive/MyDrive/MedSAM/data/ctrus/images'
mask_dir = '/content/drive/MyDrive/MedSAM/data/ctrus/masks'

# Save to google drive (so its persistent)
output_image_dir = '/content/drive/MyDrive/MedSAM/data/low_quality_sample/images'
output_mask_dir = '/content/drive/MyDrive/MedSAM/data/low_quality_sample/masks'

# Create output folders if they don't exist
os.makedirs(output_image_dir, exist_ok=True)
os.makedirs(output_mask_dir, exist_ok=True)

# Load CSV and filter low-quality
df = pd.read_csv(csv_path)
low_df = df[df['quality_name'] == 'low']
print(f" Found {len(low_df)} low-quality images")

# Copy files only if not already present
copied = 0
skipped = 0
for fname in low_df['file']:
    src_img = os.path.join(image_dir, fname)
    src_mask = os.path.join(mask_dir, fname)
    dst_img = os.path.join(output_image_dir, fname)
    dst_mask = os.path.join(output_mask_dir, fname)

    if os.path.exists(src_img) and os.path.exists(src_mask):
        if not os.path.exists(dst_img) and not os.path.exists(dst_mask):
            copyfile(src_img, dst_img)
            copyfile(src_mask, dst_mask)
            copied += 1
        else:
            skipped += 1
    else:
        print(f" Missing: {fname}")

print(f" Done. Copied: {copied}, Skipped (already exist): {skipped}")


🔍 Found 93 low-quality images
✅ Done. Copied: 0, Skipped (already exist): 93


# function to convert to .npy (with debugging help)


In [None]:
# Convert to .npy format for MedSAM input (using same function as before but with some safety checks for debugging)
import os
import numpy as np
import cv2
from tqdm import tqdm

def convert_jpg_to_npy(image_dir, mask_dir, output_img_dir, output_mask_dir):
    os.makedirs(output_img_dir, exist_ok=True)
    os.makedirs(output_mask_dir, exist_ok=True)

    image_names = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]

    print(f" Found {len(image_names)} image–mask pairs to convert...")

    for name in tqdm(image_names):
        img_path = os.path.join(image_dir, name)
        mask_path = os.path.join(mask_dir, name)

        if not os.path.exists(mask_path):
            print(f" Mask missing for {name}, skipping.")
            continue

        #  Load and normalize image
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_resized = cv2.resize(img, (1024, 1024), interpolation=cv2.INTER_CUBIC)
        img_norm = img_resized.astype(np.float32) / 255.0
        np.save(os.path.join(output_img_dir, name.replace('.jpg', '.npy')), img_norm)

        #  Load and binarize mask
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        mask_resized = cv2.resize(mask, (256, 256), interpolation=cv2.INTER_NEAREST)
        mask_bin = (mask_resized > 0).astype(np.uint8)
        np.save(os.path.join(output_mask_dir, name.replace('.jpg', '.npy')), mask_bin)

    print(" Finished converting images and masks to .npy format")



In [None]:
convert_jpg_to_npy(
    image_dir='/content/drive/MyDrive/MedSAM/data/low_quality_sample/images',
    mask_dir='/content/drive/MyDrive/MedSAM/data/low_quality_sample/masks',
    output_img_dir='/content/MedSAM/data/npy_low/images',
    output_mask_dir='/content/MedSAM/data/npy_low/masks'
)


🔍 Found 93 image–mask pairs to convert...


100%|██████████| 93/93 [02:41<00:00,  1.74s/it]

✅ Finished converting images and masks to .npy format





In [None]:
# Will also upload to drive so that it can be accessed if needed without rerunning - also making the naming compatible for when training is done with low quality samples
convert_jpg_to_npy(
    image_dir='/content/drive/MyDrive/MedSAM/data/low_quality_sample/images',
    mask_dir='/content/drive/MyDrive/MedSAM/data/low_quality_sample/masks',
    output_img_dir='/content/drive/MyDrive/MedSAM/data/npy_low_full/imgs',
    output_mask_dir='/content/drive/MyDrive/MedSAM/data/npy_low_full/gts'
)

🔍 Found 93 image–mask pairs to convert...


100%|██████████| 93/93 [00:10<00:00,  9.09it/s]

✅ Finished converting images and masks to .npy format





In [None]:
# Made a script for the first experiment that loads the fine tuned model (using the high quality images) and will run inference on the low quality images to test MedSAM's performance
!python /content/drive/MyDrive/MedSAM/fine_tune_scripts/inference_low_quality_eval_per.py \
  -data_path /content/MedSAM/data/npy_low \
  -model_path /content/drive/MyDrive/MedSAM/work_dir/MedSAM-ViT-B-20250726-1827/medsam_model_best.pth \
  --device cuda


✅ Found 93 image–mask pairs.
100% 93/93 [00:41<00:00,  2.22it/s]
📊 Evaluation Results: {'dice': 0.5104330785081832, 'iou': 0.36581054309342975, 'precision': 0.591249413668148, 'recall': 0.5157915496352499}


# Training for MedSAM on low quality images - needed for comparative analysis between both image quality groups (same process as high quality group training)


In [None]:
# Same training file but changing the input to low-quality images
!python /content/drive/MyDrive/MedSAM/fine_tune_scripts/train_one_gpu_medsam_with_evaluation_collab.py \
  -i /content/drive/MyDrive/MedSAM/data/npy_low_split/train \
  -checkpoint work_dir/MedSAM/medsam_vit_b.pth \
  --device cuda \
  -num_epochs 20 \
  -batch_size 1

2025-07-27 16:55:52.440679: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753635352.461777   15577 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753635352.468565   15577 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-07-27 16:55:52.490415: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
✅ Number of image-mask pairs: 93
100% 93/93 [01:50<00:00,  1.18s/it]
Epoch 0: Loss = 0.7520
Epoch 0 Metrics -> Dice: 

In [None]:
# Running the experiment again but with the model trained on the low-quality images instead
!python /content/drive/MyDrive/MedSAM/fine_tune_scripts/inference_low_quality_eval.py \
  -data_path /content/MedSAM/data/npy_low \
  -model_path /content/drive/MyDrive/MedSAM/work_dir/MedSAM-ViT-B-20250727-1655/medsam_model_best.pth \
  --device cuda


✅ Found 93 image–mask pairs.
100% 93/93 [00:40<00:00,  2.28it/s]
📊 Evaluation Results: {'dice': 0.9345915804627121, 'iou': 0.8777359031861828, 'precision': 0.9352748208148505, 'recall': 0.9345902742878083}


# Will also convert the full dataset to test along side the low-quality image test

In [None]:
# Filter for only valid image mask pairs (same provcess as making filtered csv used earlier but I wanted to double check and I did it without relating to mask area to make sure)
import os
import cv2
from shutil import copyfile
from tqdm import tqdm

# Original dataset paths
image_dir = '/content/drive/MyDrive/MedSAM/data/ctrus/images'
mask_dir = '/content/drive/MyDrive/MedSAM/data/ctrus/masks'

# Output paths
output_image_dir = '/content/drive/MyDrive/MedSAM/data/filtered_full/images'
output_mask_dir = '/content/drive/MyDrive/MedSAM/data/filtered_full/masks'

# Output folders
os.makedirs(output_image_dir, exist_ok=True)
os.makedirs(output_mask_dir, exist_ok=True)

# Filter valid image–mask pairs
img_files = sorted([f for f in os.listdir(image_dir) if f.endswith(('.jpg'))])
valid_count = 0

print(" Filtering for non-empty masks...")
for fname in tqdm(img_files):
    src_img = os.path.join(image_dir, fname)
    src_mask = os.path.join(mask_dir, fname)

    if not os.path.exists(src_mask):
        continue

    # Load and check mask to filter out the empty (all-black masks)
    mask = cv2.imread(src_mask, cv2.IMREAD_GRAYSCALE)
    if mask is not None and cv2.countNonZero(mask) > 0: #so it is considered valid since it is not empty
        dst_img = os.path.join(output_image_dir, fname) # it just uses fname to copy the valid image mask pairs into the filtered folder
        dst_mask = os.path.join(output_mask_dir, fname)
        copyfile(src_img, dst_img)
        copyfile(src_mask, dst_mask)
        valid_count += 1

print(f" Done. Copied {valid_count} valid image–mask pairs to 'filtered_full'.")


🔍 Filtering for non-empty masks...


100%|██████████| 827/827 [00:25<00:00, 32.79it/s]

✅ Done. Copied 508 valid image–mask pairs to 'filtered_full'.





In [None]:
# Need to convert to .npy as well
convert_jpg_to_npy(
    image_dir='/content/drive/MyDrive/MedSAM/data/filtered_full/images',
    mask_dir='/content/drive/MyDrive/MedSAM/data/filtered_full/masks',
    output_img_dir='/content/drive/MyDrive/MedSAM/data/npy_filtered_full/imgs',
    output_mask_dir='/content/drive/MyDrive/MedSAM/data/npy_filtered_full/gts'
)


100%|██████████| 508/508 [00:58<00:00,  8.62it/s]

✅ Finished converting images and masks to .npy format





When conducting the experiments, I realized the need for a seperate set of images to be used (since the model trained on the low-quality images will not be tested on unseen images). So i have split the low-quality images into two groups so that one can be used for training and the other for testing.


In [19]:
# Creating a split of the data to be used
import os
import random
import shutil

# Paths for source as well as well as folders to split the low-quality images into train and test splits
src_img_dir = '/content/drive/MyDrive/MedSAM/data/npy_low_full/imgs'
src_mask_dir = '/content/drive/MyDrive/MedSAM/data/npy_low_full/gts'

train_img_dir = '/content/drive/MyDrive/MedSAM/data/npy_low_split/train/imgs'
train_mask_dir = '/content/drive/MyDrive/MedSAM/data/npy_low_split/train/gts'
test_img_dir = '/content/drive/MyDrive/MedSAM/data/npy_low_split/test/imgs'
test_mask_dir = '/content/drive/MyDrive/MedSAM/data/npy_low_split/test/gts'

# Creating output folders to match
os.makedirs(train_img_dir, exist_ok=True)
os.makedirs(train_mask_dir, exist_ok=True)
os.makedirs(test_img_dir, exist_ok=True)
os.makedirs(test_mask_dir, exist_ok=True)

# List all .npy files
all_files = [f for f in os.listdir(src_img_dir) if f.endswith('.npy')]
random.seed(42) # to make it reproducable so that if you run this it should result in the same split as me so the results should match
random.shuffle(all_files) # shuffling

# 70/30 split
split_idx = int(len(all_files) * 0.7)
train_files = all_files[:split_idx]
test_files = all_files[split_idx:]

print(f" Splitting {len(all_files)} files into:")
print(f" Train: {len(train_files)}")
print(f" Test:  {len(test_files)}")

# To copy files - same approach as before
def copy_split(files, img_dst, mask_dst):
    for fname in files:
        shutil.copy(os.path.join(src_img_dir, fname), os.path.join(img_dst, fname))
        shutil.copy(os.path.join(src_mask_dir, fname), os.path.join(mask_dst, fname))

# copy to train/test
copy_split(train_files, train_img_dir, train_mask_dir)
copy_split(test_files, test_img_dir, test_mask_dir)

print(" Split complete. Train/test sets saved in 'npy_low_split'.")



 Splitting 93 files into:
 Train: 65
 Test:  28
 Split complete. Train/test sets saved in 'npy_low_split'.


In [None]:
# TSrain again with the split low-quality images
!python /content/drive/MyDrive/MedSAM/fine_tune_scripts/train_one_gpu_medsam_with_evaluation_collab.py \
  -i /content/drive/MyDrive/MedSAM/data/npy_low_split/train \
  -checkpoint /content/drive/MyDrive/MedSAM/work_dir/MedSAM/medsam_vit_b.pth \
  --device cuda \
  -num_epochs 20 \
  -batch_size 1


2025-07-28 19:52:05.780980: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753732326.065639   16824 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753732326.153330   16824 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-07-28 19:52:06.799992: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
✅ Number of image-mask pairs: 65
100% 65/65 [01:17<00:00,  1.19s/it]
Epoch 0: Loss = 0.8594
Epoch 0 Metrics -> Dice: 0.1885, 

# Experiment 1 - Tests

The models are tested on test split of low-quality images

In [7]:
# Running experiment with high-quality trained MedSAM
!python /content/drive/MyDrive/MedSAM/fine_tune_scripts/inference_low_quality_eval_per.py \
  -data_path /content/drive/MyDrive/MedSAM/data/npy_low_split/test \
  -model_path /content/drive/MyDrive/MedSAM/work_dir/MedSAM-ViT-B-20250726-high/medsam_model_best.pth \
  --device cuda


 Found 28 image–mask pairs.
100% 28/28 [00:45<00:00,  1.62s/it]
Evaluation Results: {'dice': 0.49488651586164323, 'iou': 0.3523703773639032, 'precision': 0.5449804830630975, 'recall': 0.5200350544681507}


In [8]:
#running experiment with low-quality trained MedSAM
!python /content/drive/MyDrive/MedSAM/fine_tune_scripts/inference_low_quality_eval_per.py \
  -data_path /content/drive/MyDrive/MedSAM/data/npy_low_split/test \
  -model_path /content/drive/MyDrive/MedSAM/work_dir/MedSAM-ViT-B-20250728-lowS/medsam_model_best.pth \
  --device cuda


 Found 28 image–mask pairs.
100% 28/28 [00:14<00:00,  1.87it/s]
Evaluation Results: {'dice': 0.4815975683087652, 'iou': 0.33079755838544334, 'precision': 0.5016562954673203, 'recall': 0.5644429938103965}


 # DO NOT RUN THE FOLLOWING
 without having finished experiment 3 (this is used to help view differences in results it will give an error if you do not do that)

In [None]:
# Running experiment with mixed-quality trained MedSAM (this is from experiment 3 but I added it here to look at differences)
!python /content/drive/MyDrive/MedSAM/fine_tune_scripts/inference_low_quality_eval_per.py \
  -data_path /content/drive/MyDrive/MedSAM/data/npy_low_split/test \
  -model_path /content/drive/MyDrive/MedSAM/work_dir/MedSAM-ViT-B-20250728-mixedS/medsam_model_best.pth \
  --device cuda


✅ Found 28 image–mask pairs.
100% 28/28 [00:16<00:00,  1.69it/s]
📊 Evaluation Results: {'dice': 0.4848139827538814, 'iou': 0.338812602184979, 'precision': 0.5526763663760254, 'recall': 0.5142066744821412}


# Experiment 2: Noise Robustness Testing


the experiments are done using the test split of low quality images (same format as above but going over experiment 1 test again to avoid potential variabce)

In [9]:
# High quality trained
!python /content/drive/MyDrive/MedSAM/fine_tune_scripts/inference_noise_eval_per.py \
  -data_path /content/drive/MyDrive/MedSAM/data/npy_low_split/test \
  -model_path /content/drive/MyDrive/MedSAM/work_dir/MedSAM-ViT-B-20250726-high/medsam_model_best.pth \
  --device cuda \
  --add_noise \
  --noise_variance 0.1


Found 28 image–mask pairs.
100% 28/28 [00:19<00:00,  1.44it/s]
Evaluation Results: {'dice': 4.501978483706767e-11, 'iou': 4.501978483706767e-11, 'precision': 0.9285714417077134, 'recall': 4.5031755048596005e-11}


In [10]:
!python /content/drive/MyDrive/MedSAM/fine_tune_scripts/inference_noise_eval_per.py \
  -data_path /content/drive/MyDrive/MedSAM/data/npy_low_split/test \
  -model_path /content/drive/MyDrive/MedSAM/work_dir/MedSAM-ViT-B-20250726-high/medsam_model_best.pth \
  --device cuda \
  --noise_variance 0.1

Found 28 image–mask pairs.
100% 28/28 [00:15<00:00,  1.78it/s]
Evaluation Results: {'dice': 0.49488651586164323, 'iou': 0.3523703773639032, 'precision': 0.5449804830630975, 'recall': 0.5200350544681507}


In [11]:
# Low quality trained
!python /content/drive/MyDrive/MedSAM/fine_tune_scripts/inference_noise_eval_per.py \
  -data_path /content/drive/MyDrive/MedSAM/data/npy_low_split/test \
  -model_path /content/drive/MyDrive/MedSAM/work_dir/MedSAM-ViT-B-20250728-lowS/medsam_model_best.pth \
  --device cuda \
  --add_noise \
  --noise_variance 0.1


Found 28 image–mask pairs.
100% 28/28 [00:20<00:00,  1.38it/s]
Evaluation Results: {'dice': 0.005514042575159581, 'iou': 0.002854946435306393, 'precision': 0.55685350852101, 'recall': 0.0029052819099676464}


In [12]:
!python /content/drive/MyDrive/MedSAM/fine_tune_scripts/inference_noise_eval_per.py \
  -data_path /content/drive/MyDrive/MedSAM/data/npy_low_split/test \
  -model_path /content/drive/MyDrive/MedSAM/work_dir/MedSAM-ViT-B-20250728-lowS/medsam_model_best.pth \
  --device cuda \
  --noise_variance 0.1

Found 28 image–mask pairs.
100% 28/28 [00:16<00:00,  1.70it/s]
Evaluation Results: {'dice': 0.4815975683087652, 'iou': 0.33079755838544334, 'precision': 0.5016562954673203, 'recall': 0.5644429938103965}


# Experiment 3: Fine-tuning with mixed quality images - testing the strength of variety training in comparison to quality specific training

In [20]:
# To train the model, we need to create the mixed folder containing the high and low quality images (making sure to have an even mix of the two image qualities to avoid training bias)
import os
import shutil
import random

high_path = "/content/drive/MyDrive/MedSAM/data/npy_high_full"
low_path = "/content/drive/MyDrive/MedSAM/data/npy_low_split/train"
mixed_path = "/content/drive/MyDrive/MedSAM/data/npy_mixedS"
mixed_imgs = os.path.join(mixed_path, "imgs")
mixed_gts = os.path.join(mixed_path, "gts")

os.makedirs(mixed_imgs, exist_ok=True)
os.makedirs(mixed_gts, exist_ok=True)

# Match number of low-quality samples - dont want to get bias in training
low_files = os.listdir(os.path.join(low_path, "imgs"))
high_files = os.listdir(os.path.join(high_path, "imgs"))
random.seed(42)
high_sample = random.sample(high_files, len(low_files))

def copy_samples(file_list, src_img_dir, src_gt_dir, dst_img_dir, dst_gt_dir):
    for fname in file_list:
        shutil.copy(os.path.join(src_img_dir, fname), os.path.join(dst_img_dir, fname))
        shutil.copy(os.path.join(src_gt_dir, fname), os.path.join(dst_gt_dir, fname))

copy_samples(
    file_list=low_files,
    src_img_dir=os.path.join(low_path, "imgs"),
    src_gt_dir=os.path.join(low_path, "gts"),
    dst_img_dir=mixed_imgs,
    dst_gt_dir=mixed_gts,
)

copy_samples(
    file_list=high_sample,
    src_img_dir=os.path.join(high_path, "imgs"),
    src_gt_dir=os.path.join(high_path, "gts"),
    dst_img_dir=mixed_imgs,
    dst_gt_dir=mixed_gts,
)

print(f" Created mixed-quality dataset at: {mixed_path}")
print(f" {len(low_files)} low-quality + {len(high_sample)} high-quality samples")


 Created mixed-quality dataset at: /content/drive/MyDrive/MedSAM/data/npy_mixedS
 65 low-quality + 65 high-quality samples


In [None]:
# Now we can run the same training script with the mized set of images
!python /content/drive/MyDrive/MedSAM/fine_tune_scripts/train_one_gpu_medsam_with_evaluation_collab.py \
  -i /content/drive/MyDrive/MedSAM/data/npy_mixedS \
  -checkpoint /content/drive/MyDrive/MedSAM/work_dir/MedSAM/medsam_vit_b.pth \
  --device cuda \
  -num_epochs 20 \
  -batch_size 1


2025-07-28 20:41:37.091847: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753735297.366006   30358 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753735297.442380   30358 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-07-28 20:41:38.021989: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
✅ Number of image-mask pairs: 130
100% 130/130 [02:36<00:00,  1.20s/it]
Epoch 0: Loss = 0.6356
Epoch 0 Metrics -> Dice: 0.433

# Run previous experiments using the new model

Running experiment 1 using mixed trained model

In [13]:
# Running experiment with mixed-quality trained MedSAM
!python /content/drive/MyDrive/MedSAM/fine_tune_scripts/inference_low_quality_eval_per.py \
  -data_path /content/drive/MyDrive/MedSAM/data/npy_low_split/test \
  -model_path /content/drive/MyDrive/MedSAM/work_dir/MedSAM-ViT-B-20250728-mixedS/medsam_model_best.pth \
  --device cuda

 Found 28 image–mask pairs.
100% 28/28 [00:15<00:00,  1.84it/s]
Evaluation Results: {'dice': 0.4848139827538814, 'iou': 0.338812602184979, 'precision': 0.5526763663760254, 'recall': 0.5142066744821412}


Running experiment 2 using mixed trained model

In [14]:
# Mixed treained model
!python /content/drive/MyDrive/MedSAM/fine_tune_scripts/inference_noise_eval_per.py \
  -data_path /content/drive/MyDrive/MedSAM/data/npy_low_split/test \
  -model_path /content/drive/MyDrive/MedSAM/work_dir/MedSAM-ViT-B-20250728-mixedS/medsam_model_best.pth \
  --device cuda \
  --add_noise \
  --noise_variance 0.1



Found 28 image–mask pairs.
100% 28/28 [00:20<00:00,  1.35it/s]
Evaluation Results: {'dice': 3.792401371609374e-05, 'iou': 1.8967681739204775e-05, 'precision': 0.8928571522263439, 'recall': 1.8967681745539863e-05}


In [15]:
!python /content/drive/MyDrive/MedSAM/fine_tune_scripts/inference_noise_eval_per.py \
  -data_path /content/drive/MyDrive/MedSAM/data/npy_low_split/test \
  -model_path /content/drive/MyDrive/MedSAM/work_dir/MedSAM-ViT-B-20250728-mixedS/medsam_model_best.pth \
  --device cuda \
  --noise_variance 0.1


Found 28 image–mask pairs.
100% 28/28 [00:16<00:00,  1.66it/s]
Evaluation Results: {'dice': 0.4848139827538814, 'iou': 0.338812602184979, 'precision': 0.5526763663760254, 'recall': 0.5142066744821412}


# Experiment 4: Full C-TRUS Evaluation Using Fine-tuned Models

Since I have trained all the models and have conducted the experiments, I decided to also run Experiment 1 with the full C-TRUS dataset (like in zero-shot) as it may be useful for comparisons between tuning and non tuning approaches with MedSAM.

In [16]:
# Running experiment with high-quality trained MedSAM
!python /content/drive/MyDrive/MedSAM/fine_tune_scripts/inference_low_quality_eval_per.py \
  -data_path /content/drive/MyDrive/MedSAM/data/npy_filtered_full \
  -model_path /content/drive/MyDrive/MedSAM/work_dir/MedSAM-ViT-B-20250726-high/medsam_model_best.pth \
  --device cuda

 Found 508 image–mask pairs.
100% 508/508 [09:28<00:00,  1.12s/it]
Evaluation Results: {'dice': 0.7259940419128494, 'iou': 0.6163883613953363, 'precision': 0.7644775034342253, 'recall': 0.7282553862867671}


In [17]:
# Running experiment with low-quality trained MedSAM
!python /content/drive/MyDrive/MedSAM/fine_tune_scripts/inference_low_quality_eval_per.py \
  -data_path /content/drive/MyDrive/MedSAM/data/npy_filtered_full \
  -model_path /content/drive/MyDrive/MedSAM/work_dir/MedSAM-ViT-B-20250728-lowS/medsam_model_best.pth \
  --device cuda

 Found 508 image–mask pairs.
100% 508/508 [04:19<00:00,  1.96it/s]
Evaluation Results: {'dice': 0.6646983654351125, 'iou': 0.5244313150010069, 'precision': 0.7178379753569487, 'recall': 0.6688541508984649}


In [18]:
# Running experiment with mixed-quality trained MedSAM
!python /content/drive/MyDrive/MedSAM/fine_tune_scripts/inference_low_quality_eval_per.py \
  -data_path /content/drive/MyDrive/MedSAM/data/npy_filtered_full \
  -model_path /content/drive/MyDrive/MedSAM/work_dir/MedSAM-ViT-B-20250728-mixedS/medsam_model_best.pth \
  --device cuda

 Found 508 image–mask pairs.
100% 508/508 [04:15<00:00,  1.98it/s]
Evaluation Results: {'dice': 0.7426482660578578, 'iou': 0.625330237987521, 'precision': 0.7850671473333216, 'recall': 0.7390815396261526}


# Analysis of the Tests

The results are further analyzed using wilcoxon signed-rank tests to see if the differences are significant. This was done using the wilcoxin_compare.py file and the results can be found in the Results_analysis jupyter notebook in my GitHub

# Debugging Section

I realizedf that to make statistical analysis using the results, per image metrics are needed so I ran the same tests again but with the per image metrics being saved so that I can use it to back up my claims based on the results

# which checkpoint is which for the training
MedSAM-ViT-B-20250727-1932 - mixed training \
MedSAM-ViT-B-20250727-1655 - low training \
MedSAM-ViT-B-20250726-1827 - high training \

MedSAM-ViT-B-20250727-mixed - mixed training \
MedSAM-ViT-B-20250727-low - low training \

# the final version for testing
MedSAM-ViT-B-20250726-high - high training \
MedSAM-ViT-B-20250728-lowS --> fixed with split data \
MedSAM-ViT-B-20250728-mixedS --> fixed with split data

