In [1]:
import numpy as np
from PIL import Image
import os
import tifffile
from scipy.ndimage import label, generate_binary_structure, find_objects
import pandas as pd
from skimage.morphology import binary_dilation

In [2]:
#get tif from npz
# Define the directories
npz_folder = # npz folder
tif_folder = # tif folder

def extract_npz_data(npz_file_path):
    with np.load(npz_file_path) as data:
        coordinates_list = []

        # Iterate over each array in the .npz file
        for array_name in data.files:
            array = data[array_name]

            # Check if the array has the shape (12, 1, 2)
            if array.shape == (12, 1, 2):
                # Reshape the array to (12, 2) to get (x, y) pairs
                reshaped_array = array.reshape(12, 2)
                # Append the reshaped array to the coordinates list
                coordinates_list.append(reshaped_array)

        # Concatenate all arrays in the list into a single 2D array
        if len(coordinates_list)!=0:
            all_coordinates = np.vstack(coordinates_list)
        else:
            all_coordinates = np.array([])
        arrs = [data[f'arr_{i}'] for i in range(len(data.files))]
    return all_coordinates

def reconstruct_image_from_coords(coords, shape):
    # Initialize an empty image array of the desired shape
    image = np.zeros(shape, dtype=np.uint8)

    #for coord_set in coords:
    if (len(coords) != 0):
        for coord in coords:
            # Ensure the coordinates are within bounds
            x, y = coord
            if 0 <= x < shape[0] and 0 <= y < shape[1]:
                image[y,x] = 255  # Set pixel value to 255 (white)
    
    return image

def process_npz_files(npz_folder, tif_folder):
    if not os.path.exists(tif_folder):
        os.makedirs(tif_folder)
    
    for file in os.listdir(npz_folder):
        if file.endswith(".npz"):
            npz_file_path = os.path.join(npz_folder, file)
            npz_data = extract_npz_data(npz_file_path)
            
            # Assuming that npz_data contains the coordinates and augmented coordinates
            # Process each set of coordinates
            coordinates = npz_data  # Adjust index based on your NPZ structure
            
            # Define the image shape (you should specify the correct shape here)
            image_shape = (2048, 2448)  # Example shape; adjust as needed
            
            # Reconstruct the image
            image = reconstruct_image_from_coords(coordinates, image_shape)
            
            # Define the TIFF file path
            tif_file_path = os.path.join(tif_folder, file.replace(".npz", ".tif"))
            
            # Save the image as a TIFF file
            tifffile.imsave(tif_file_path, image)
            
            #print(f"Saved: {tif_file_path}")

process_npz_files(npz_folder, tif_folder)

  tifffile.imsave(tif_file_path, image)


In [3]:
def is_surrounded_by_255(image, component_mask):
    # Get the bounding box of the component
    bbox = find_objects(component_mask)[0]
    bbox_slices = (slice(max(bbox[0].start - 1, 0), min(bbox[0].stop + 1, image.shape[0])),
                   slice(max(bbox[1].start - 1, 0), min(bbox[1].stop + 1, image.shape[1])))

    # Check the pixels outside the bounding box
    outside_bbox = np.ones(image.shape, dtype=bool)
    outside_bbox[bbox_slices] = False

    # Check if all pixels outside the bounding box are 255
    return np.all(image[outside_bbox] == 255)


In [4]:
def process_image(image):
    mask = image == 0
    labeled, num_features = label(mask)
    for i in range(1, num_features + 1):
        component_mask = (labeled == i)
        if is_surrounded_by_255(image, component_mask):
            image[component_mask] = 255
    labels, num_features = label(image == 0)
    sizes = np.bincount(labels.ravel())
    sizes[0] = 0

    mask = np.zeros_like(image, dtype=bool)
    for label_x in range(1, num_features + 1):
        if sizes[label_x] != 13:
            mask[labels == label_x] = True

    image[mask] = 255
    return image

In [5]:
def process_tif_files(input_folder, output_folder):
    
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for file_name in os.listdir(input_folder):
        if file_name.endswith(".tif"):
            # Read the TIF file
            
            input_path = os.path.join(input_folder, file_name)
            
            image_array = tifffile.imread(input_path)
            
            image = process_image(image_array)
            
            # Save the processed image
            output_path = os.path.join(output_folder, file_name)
            tifffile.imwrite(output_path, image)
            #print(f"Processed and saved: {output_path}")

In [6]:
input_folder = #input folder
output_folder = #output folder

process_tif_files(input_folder, output_folder)

In [7]:
output_folder = #output folder
input_folder = #input folder

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

for file_name in os.listdir(input_folder):
    if file_name.endswith(".tif"):
        # Read the TIF file

        input_path = os.path.join(input_folder, file_name)

        image_array = tifffile.imread(input_path)
        swapped_image = np.copy(image_array)

        swapped_image[image_array == 0] = 255
        swapped_image[image_array == 255] = 0
        image = swapped_image

        # Save the processed image
        output_path = os.path.join(output_folder, file_name)
        tifffile.imwrite(output_path, image)
        #print(f"Processed and saved: {output_path}")

In [2]:
def find_connected_groups(input_array, shape):
    """
    Find connected groups in a list of coordinates.

    Parameters:
    coords (np.ndarray): List of coordinates (x, y) in the form of a 2D array.
    shape (tuple): Shape of the array where coordinates are valid.

    Returns:
    list of sets: Each set contains coordinates that form a connected group.
    """

    # Find connected components
    structure = np.ones((3, 3), dtype=int)
    labeled_array, num_features = label(input_array,structure)
    
    # Extract connected groups
    connected_groups = []
    for i in range(1, num_features + 1):
        group_coords = np.argwhere(labeled_array == i)
        connected_groups.append(set(map(tuple, group_coords)))
    
    return connected_groups, num_features

In [3]:
def count_overlapping_components(comp_list, coord_groups):
    """
    Count unique connected components that overlap with more than one group of coordinates.

    Parameters:
    comp_list (list of sets): Each set contains coordinates of a connected component.
    coord_groups (list of sets): Each set contains coordinates of a connected group.

    Returns:
    int: The number of unique connected components overlapping with more than one group.
    """
    num_over1 = 0
    
    for comp_set in comp_list:
        # Count how many unique groups overlap with the current component
        overlap_count = sum(1 for group_set in coord_groups if comp_set & group_set)
        
        # Increment if overlapping with more than one unique group
        if overlap_count > 1:
            num_over1 += 1
    
    return num_over1

In [4]:
def count_unique_overlapping_groups(comp_list, coord_groups):
    """
    Count the number of unique coordinate groups that overlap with at least one component set.

    Parameters:
    comp_list (list of sets): Each set contains coordinates of a connected component.
    coord_groups (list of sets): Each set contains coordinates of a connected group.

    Returns:
    int: The number of unique coordinate groups that overlap with at least one component set.
    """
    overlapping_groups = set()
    
    for group_set in coord_groups:
        # Check if the current group overlaps with any component set
        if any(group_set & comp_set for comp_set in comp_list):
            overlapping_groups.add(tuple(sorted(group_set)))
    
    return len(overlapping_groups)

In [5]:
def calculate_ratio(image):
    # Convert image to numpy array
    image_array = np.array(image)
    
    # Calculate number of 0s and 255s
    num_zeros = np.sum(image_array == 0)
    num_ones = np.sum(image_array == 255) #look at input, change to 1 for output
    
    # Avoid division by zero
    if num_ones == 0:
        return float('inf')  # or some other large value indicating an undefined ratio
    
    # Calculate the ratio of 0s to 255s
    ratio = num_zeros / num_ones
    return ratio

# def process_images(folder_path):
#     ratios = []
    
#     # Loop over all files in the folder
#     for filename in os.listdir(folder_path):
#         if filename.endswith('.tif'):
#             file_path = os.path.join(folder_path, filename)
#             # Open image
#             with Image.open(file_path) as img:
#                 ratio = calculate_ratio(img)
#                 ratios.append(ratio)
    
#     # Calculate the average ratio
#     average_ratio = np.mean(ratios)
#     return average_ratio

# Specify the folder path containing PNG images
#folder_path = '/work/users/d/p/dpguilba/snu16_out_masks'

# Get the average ratio
# average_ratio = process_images(folder_path)
# average_ratio = average_ratio*25

In [6]:
columns = ['Image name','Pred count','Actual count','Accuracy','Precision','Recall','Matthew','F1','TP','TN','FP','FN']

In [7]:
input_tif_folder = #input folder
output_tif_folder = #output folder

input_tif_files = sorted([f for f in os.listdir(input_tif_folder) if f.endswith('.tif')])
output_tif_files = sorted([f for f in os.listdir(output_tif_folder) if f.endswith('.tif')])

df = pd.DataFrame(columns=columns)
rows = []

# Check if the number of files matches
if len(input_tif_files) != len(output_tif_files):
    raise ValueError("The number of .npz files and .tif files do not match.")

for input_tif_file, output_tif_file in zip(input_tif_files, output_tif_files):
    
    input_tif_file_path = os.path.join(input_tif_folder, input_tif_file)
    output_tif_file_path = os.path.join(output_tif_folder, output_tif_file)
    
    tif_file_name = os.path.basename(output_tif_file)
    
    input_tif_image = Image.open(input_tif_file_path)
    input_array = np.array(input_tif_image) #(2048,2448) all 0s and 255s
    
    output_tif_image = Image.open(output_tif_file_path)
    out_array = np.array(output_tif_image) #(2048,2448) all 0s and 1s
    
    labeled_out_array = label(out_array)
    labeled_out_array = labeled_out_array[0]
    num_comp = len(np.unique(labeled_out_array))-1
    
    comps = [[] for _ in range(num_comp)] #[[[comp1_x1,comp1_y1],[comp1_x2,comp1_y2]...],[[comp2_x1,comp2_y1],[comp2_x2,comp2_y2]...]...]
    
    for x in range(out_array.shape[0]):
        for y in range(out_array.shape[1]):

            comp_id = labeled_out_array[x,y]

            if comp_id!=0:
                ind = [x,y]
                comps[comp_id - 1].append(ind)
                
    comp_list = [set(map(tuple, comp)) for comp in comps]
    
    coord_groups, num_features = find_connected_groups(input_array, out_array.shape)

    count_of_1s = count_unique_overlapping_groups(comp_list, coord_groups)  #num of GTs that overlap with at least 1 component
    
    num_over1 = count_overlapping_components(comp_list, coord_groups) #number of components that overlap with more than 1 GT
    
    true_pos = count_of_1s - num_over1
    false_pos = num_comp - true_pos
    false_neg = num_features - true_pos
    
    # now we need true negatives

    # Step 1: Find coordinates of all zeros in out_array
    zero_coords_out = np.argwhere(out_array == 0)  # shape (num_zeros_out, 2)

    # Convert zero_coords to a set of tuples for efficient comparison
    zero_coords_out_set = set(map(tuple, zero_coords_out))
    
    #Find coordinates of all zeros in input_array
    zero_coords_in = np.argwhere(input_array == 0) # shape (num_zeros_in, 2)
    
    # Convert input_array to a set of tuples for efficient comparison
    zero_coords_in_set = set(map(tuple, zero_coords_in))

    # Step 2: Find common coordinates
    common_coords = zero_coords_out_set & zero_coords_in_set #shape (num_zero_both,2) ??

    # Number of zeros in both input and output
    num_common_zeros = len(common_coords)

    true_neg = num_common_zeros #this needs to be weighted
    ratio = calculate_ratio(input_array)
    ratio = ratio*25
    true_neg = int(np.round(true_neg/ratio))
    
    if (true_pos+true_neg+false_pos+false_neg)!=0:
        accuracy = (true_pos+true_neg)/(true_pos+true_neg+false_pos+false_neg)
    else:
        accuracy = 'na'
        
    if (true_pos+false_neg)!=0:
        recall = true_pos/(true_pos+false_neg)
    else:
        recall = 'na'
        
    if (true_pos+false_pos)!=0:
        precision = true_pos / (true_pos+false_pos)
    else:
        precision = 'na'
    
    numerator = true_pos * true_neg - false_pos * false_neg
    denominator = np.sqrt((true_pos + false_pos) * (true_pos + false_neg) * (true_neg + false_pos) * (true_neg + false_neg))
    
    if denominator==0:
        matthew_coeff = 'na'
    else:
        matthew_coeff = numerator/denominator
    if (precision!='na') & (recall!='na'):   
        if (precision+recall!=0):
            f1 = 2 * (precision * recall) / (precision + recall)
        else:
            f1 = 'na'
    else:
        f1 = 'na'
        
    data = [tif_file_name,num_comp,num_features,accuracy,precision,recall,matthew_coeff,f1,true_pos,true_neg,false_pos,false_neg]
    row_df = pd.DataFrame([data], columns=columns)
    rows.append(row_df)
    df = pd.concat(rows, ignore_index=True)
    df.to_csv('accuracy_met_full_split5.csv', index=False)

In [13]:
#merge training labels and model output for manual analysis
#training labels = red, MIA output = blue, overlap = green

input_tif = #
output_tif = #
output_path = #

input_tif_image = Image.open(input_tif)
input_array = np.array(input_tif_image) #(2048,2448) all 0s and 255s

output_tif_image = Image.open(output_tif)
out_array = np.array(output_tif_image) #(2048,2448) all 0s and 1s

out_array[out_array == 1] = 255

C = np.zeros((2048, 2448, 3), dtype=np.uint8)
C[input_array == 255] = [255, 0, 0]
C[out_array == 255] = [0, 0, 255]
C[(input_array == 255) & (out_array == 255)] = [0, 255, 0]

image = C

tifffile.imwrite(output_path, image)

In [16]:
#investigate true negatives

input_tif = #
output_tif = #
output_path = #

input_tif_image = Image.open(input_tif)
input_array = np.array(input_tif_image) #(2048,2448) all 0s and 255s

output_tif_image = Image.open(output_tif)
out_array = np.array(output_tif_image) #(2048,2448) all 0s and 1s

# Step 1: Find coordinates of all zeros in out_array
zero_coords_out = np.argwhere(out_array == 0)  # shape (num_zeros_out, 2)

# Convert zero_coords to a set of tuples for efficient comparison
zero_coords_out_set = set(map(tuple, zero_coords_out))

#Find coordinates of all zeros in input_array
zero_coords_in = np.argwhere(input_array == 0) # shape (num_zeros_in, 2)

# Convert input_array to a set of tuples for efficient comparison
zero_coords_in_set = set(map(tuple, zero_coords_in))

# Step 2: Find common coordinates
common_coords = zero_coords_out_set & zero_coords_in_set #shape (num_zero_both,2) ??

# Number of zeros in both input and output
num_common_zeros = len(common_coords)

true_neg = num_common_zeros #this needs to be weighted
print(true_neg)

4993300
