### Mount GDrive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Install and Import SAM

In [None]:
%cd "/content/drive/MyDrive/Final IDCC-SAM/"

!git clone https://github.com/facebookresearch/segment-anything.git
!cd segment-anything; pip install -e.

!pip install git+https://github.com/facebookresearch/segment-anything.git

In [3]:
import os
import cv2
import numpy as np
from google.colab.patches import cv2_imshow
import matplotlib.pyplot as plt
from segment_anything import sam_model_registry,  SamAutomaticMaskGenerator, SamPredictor

In [4]:
if not os.path.exists("/content/drive/MyDrive/Final IDCC-SAM/segment-anything/sam_vit_h_4b8939.pth"):
  !wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth -O /content/drive/MyDrive/Final IDCC-SAM/segment-anything/sam_vit_h_4b8939.pth

### Define SAM Mask Generator Functions

In [5]:
# This script defines a function to visualize annotations (masks) and provides functionality
# to use the Segment Anything Model (SAM) for generating automatic masks.

import sys
sys.path.append("..")
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor

# Path to the SAM model checkpoint, downloaded during installation
sam_checkpoint = "/content/drive/MyDrive/Final IDCC-SAM/segment-anything/sam_vit_h_4b8939.pth"

#Use the visual transformer backbone
model_type = "vit_h"

device = "cuda"

# Load the SAM model from the registry using the provided checkpoint
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam.to(device=device)

# Create a mask generator using the SAM model
#mask_generator = SamAutomaticMaskGenerator(sam)
mask_generator = SamAutomaticMaskGenerator(
     model=sam,
     pred_iou_thresh=0.8
 )
def show_anns(anns):

  #Display annotations (masks) on the current plot.
    #Parameters:
        #anns (list): List of annotations, each containing a segmentation mask.

    if len(anns) == 0:
        return

    # Sort annotations based on area in descending order
    sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True)
    ax = plt.gca()
    ax.set_autoscale_on(False)
    polygons = []
    color = []
    # Plot each annotation
    for ann in sorted_anns:
        m = ann['segmentation'] # Segmentation mask
        img = np.ones((m.shape[0], m.shape[1], 3)) # Create an RGB image
        color_mask = np.random.random((1, 3)).tolist()[0] # Generate a random color
        for i in range(3):
            img[:,:,i] = color_mask[i] # Assign the color to each channel
        ax.imshow(np.dstack((img, m*0.35))) # Overlay the mask on the image with transparency

### Zero-Shot Segmentation Run SAM on all images in a loop and generate masks

In [6]:
import cv2
import numpy as np
import os
import pandas as pd

# Path to the folder containing the images
folder_path = '/content/drive/MyDrive/Final IDCC-SAM/Dataset/IDCC_Images/Test_clahe/'

# Get the list of image filenames and sort them in ascending order
image_filenames = sorted(os.listdir(folder_path))

# Load the ground truth Excel file
excel_file_path = '/content/drive/MyDrive/Final IDCC-SAM/Dataset/summarized ground truths/overall/overall_ground_truth.xlsx'
ground_truth_df = pd.read_excel(excel_file_path)

# Create lists to store our prediction results summary information at the end
summary_data = []

# Loop through each image
for filename in image_filenames:
    # Load the original image
    image_path = os.path.join(folder_path, filename)
    original_image = cv2.imread(image_path, 0)

    # Convert image to RGB and then Generate masks
    image= cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
    masks = mask_generator.generate(image)

    # For our evaluation, let's extract number of cells from ground truth Excel files
    image_name = os.path.splitext(filename)[0]  # Remove file extension
    matching_row = ground_truth_df[ground_truth_df['filename'] == image_name + '.csv']
    num_cells_gt = matching_row['number_of_cells'].values[0] if not matching_row.empty else 'Unknown'

    # Calculate the absolute difference between ground truth and mask length
    abs_diff = abs(num_cells_gt - len(masks))
    abs_diff_str = f'Abs Diff: {abs_diff}' if num_cells_gt != 'Unknown' else 'Abs Diff: Unknown'

    # Calculate the absolute percentage accuracy
    abs_percentage_accuracy = f'{(1 - abs_diff / num_cells_gt) * 100:.2f}%' if num_cells_gt != 'Unknown' and num_cells_gt != 0 else 'Unknown'

    # Add summary data to list
    summary_data.append({
        'Image Name': image_name,
        'Ground Truth': num_cells_gt,
        'Prediction': len(masks),
        'Absolute Difference': abs_diff
    })

# Create DataFrame from summary data
summary_df = pd.DataFrame(summary_data)

# Save summary data to Excel file
summary_excel_path = '/content/drive/MyDrive/Final IDCC-SAM/IDCC-SAM Test Results/IDCC-SAM final prediction_summary.xlsx'
summary_df.to_excel(summary_excel_path, index=False)

Label the prediction with the image type

In [7]:
import pandas as pd

# Read the Excel file
excel_file = '/content/drive/MyDrive/Final IDCC-SAM/IDCC-SAM Test Results/IDCC-SAM final prediction_summary.xlsx'
df = pd.read_excel(excel_file)

# Add a new column "Image Type" with initial value as empty string
df['Image Type'] = ''

# Function to label Image Type based on Image Name
def label_image_type(image_name):
    # Convert image name to lowercase for case-insensitive comparison
    image_name_lower = image_name.lower()

    # Check if image name contains both "DAPI" and any of the other specified substrings
    if 'dapi' in image_name_lower:
        for substring in ['ki67', 'gfab', 'map2ab', 'nestin', 'rip', 'tuj1']:
            if substring in image_name_lower:
                return 'DAPI'
    # Check if image name contains any of the specified substrings
    if 'gfap' in image_name_lower and 'dapi' not in image_name_lower:
        return 'GFAP'
    elif 'ki67' in image_name_lower and 'dapi' not in image_name_lower:
        return 'KI67'
    elif 'map2ab' in image_name_lower and 'dapi' not in image_name_lower:
        return 'MAP2ab'
    elif 'nestin' in image_name_lower and 'dapi' not in image_name_lower:
        return 'Nestin'
    elif 'rip' in image_name_lower and 'dapi' not in image_name_lower:
        return 'RIP'
    elif 'tuj1' in image_name_lower and 'dapi' not in image_name_lower:
        return 'TuJ1'
    else:
        return 'DAPI'

# Apply the function to label Image Type based on Image Name
df['Image Type'] = df['Image Name'].apply(label_image_type)

# Save the updated DataFrame to the Excel file
df.to_excel(excel_file, index=False)

print("Image Type column added and labeling completed.")


Image Type column added and labeling completed.


In [8]:
import pandas as pd

# Read the Excel file
excel_file = '/content/drive/MyDrive/Final IDCC-SAM/IDCC-SAM Test Results/IDCC-SAM final prediction_summary.xlsx'
df = pd.read_excel(excel_file)

# Function to calculate the analysis
def calculate_analysis(group):
    # Filter rows where "Absolute Difference" column is <= 10
    filtered_rows = group[group['Absolute Difference'] <= 10]
    # Calculate the count of such rows
    count = len(filtered_rows)
    # Calculate the total count of rows for the image type
    total_count = len(group)
    # Calculate the ratio of filtered rows to total rows
    ratio = count / total_count if total_count > 0 else 0
    return ratio

# Group by "Image Type" and apply the analysis function
analysis_result = df.groupby('Image Type').apply(calculate_analysis)

# Add the analysis result to the DataFrame
df['Analysis Result'] = df['Image Type'].map(analysis_result)

# Save the DataFrame back to the Excel file
df.to_excel(excel_file, index=False)

print("Analysis result saved to Excel file.")


Analysis result saved to Excel file.


### For Visualization purpose, generate and save the final segmentation output images from SAM

Segment with multicolor

In [9]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd

# Function to generate random colors
def generate_random_colors(num_colors):
    return np.random.rand(num_colors, 3)  # Generate random RGB values in the range [0, 1]


# Path to the folder containing the images
folder_path = '/content/drive/MyDrive/Final IDCC-SAM/Dataset/IDCC_Images/Test_clahe/'

# Get the list of image filenames and sort them in ascending order
image_filenames = sorted(os.listdir(folder_path))

# Load the ground truth Excel file
excel_file_path = '/content/drive/MyDrive/Final IDCC-SAM/Dataset/summarized ground truths/overall/overall_ground_truth.xlsx'  # Replace with the actual path
ground_truth_df = pd.read_excel(excel_file_path)

# Create lists to store prediction summary information
summary_data = []

# Calculate the number of rows and columns for subplots
num_images = len(image_filenames)
num_columns = 2
num_rows = num_images

# Create a directory to save the plots as images
output_folder = '/content/drive/MyDrive/Final IDCC-SAM/IDCC-SAM Test Results/final 100 segmentation-output-images'
os.makedirs(output_folder, exist_ok=True)

for i, filename in enumerate(image_filenames, 1):
    # Load the original image
    image_path = os.path.join(folder_path, filename)
    original_image = cv2.imread(image_path, 0)

    # Load the masks (assuming you have a function called 'mask_generator.generate' that generates masks)
    image= cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
    masks = mask_generator.generate(image)

    # Extract number of cells from ground truth Excel file
    image_name = os.path.splitext(filename)[0]  # Remove file extension
    matching_row = ground_truth_df[ground_truth_df['filename'] == image_name + '.csv']
    num_cells_gt = matching_row['number_of_cells'].values[0] if not matching_row.empty else 'Unknown'

    # Calculate the absolute difference between ground truth and mask length
    abs_diff = abs(num_cells_gt - len(masks))
    num_cells = f'Pred: {len(masks)}' if num_cells_gt != 'Unknown' else 'Abs Diff: Unknown'
    abs_diff_str = f'Abs Diff: {abs_diff}' if num_cells_gt != 'Unknown' else 'Abs Diff: Unknown'

    # Add summary data to list
    summary_data.append({
        'Image Name': image_name,
        'Ground Truth': num_cells_gt,
        'Prediction': len(masks),
        'Absolute Difference': abs_diff
    })

    # Create the plot
    plt.figure(figsize=(8, 4))  # Adjusted figsize
    plt.subplot(1, 2, 1)
    plt.imshow(original_image, cmap='gray')
    plt.title(f'Original Image\nNumber of Cells: {num_cells_gt}')
    plt.axis('off')

    plt.subplot(1, 2, 2)
    plt.imshow(original_image, cmap='gray')
    plt.title('Mask \n' + num_cells + '\n' + abs_diff_str)
    colors = generate_random_colors(len(masks))  # Generate random colors
    for j, v in enumerate(masks):
        x = v['bbox'][0] + v['bbox'][2] / 2
        y = v['bbox'][1] + v['bbox'][3] / 2
        color = colors[j]  # Assign a unique color to each mask
        plt.plot(x, y, marker="o", markersize=4, markeredgecolor="white", markerfacecolor=color)
    plt.axis('off')

    # Save the plot as an image
    output_filename = os.path.join(output_folder, f'{image_name}_plot.tiff')
    plt.savefig(output_filename, bbox_inches='tight')
    plt.close()


### End