In [None]:
import functions
import pandas as pd
import xml.etree.ElementTree as ET

In this case channels are not stored in a seperate file, and the mask is not iteratively numbered. The following code block is the additional handling associated with extracting the metadata and cell segmentation mask

In [None]:
feature_tiff_path = "/data/raw_data/lung_cancer/lung_cancer_features.tif"
mask_tiff_path = "/data/raw_data/lung_cancer/lung_cancer_segmentation_mask.tif"

img_data, metadata = read_tiff_all_bands(feature_tiff_path)  # This will have shape (height, width, num_channels)
mask_data = read_single_band(mask_tiff_path)

mask = preprocess_mask(mask_data)

# As the mask and the feature image does not have precisely the same dimensions
cropped_img_data = crop_tiff_to_match_mask(img_data, mask_data.shape)

# Extracting metadata from the feature image to get channels
xml_content = metadata['TIFFTAG_IMAGEDESCRIPTION']

# Parse the XML content
root = ET.fromstring(xml_content)

# Extract namespace (assuming it's in the 'xmlns' of the <OME> element)
namespace = {'ome': 'http://www.openmicroscopy.org/Schemas/OME/2016-06'}

# Initialize an empty list to collect channel data
channels = []

# Traverse the XML tree to extract channel information
# Note the use of the namespace prefix in the XPath query
for channel in root.findall('.//ome:Channel', namespace):
    channel_id = channel.get('ID').split(':')[1]
    channel_name = channel.get('Name')

    # Append each channel's information as a dictionary to the channels list
    channels.append({
        '': channel_id,
        'channel': channel_name,
    })

# Convert list of dictionaries to pandas DataFrame
channels_df = pd.DataFrame(channels)[:48] # here removing 2 non-channels

# Not including channels that will likely only lead to noise
#print(channels_df)
noisy_channels = {0,1,2,3,4,5,6,39,40,41,42,43,44,45,46,47}
# removed channels: 80ArAr, 89Y, 120Sn, 127I, 131Xe, 138Ba, 140Ce, 189Os, 190BCKG, 191Ir_DNA1, 193Ir_DNA2, ICSK1, ICSK2, ICSK3, 202Hg, 208Pb

# Calculate remaining channels by excluding noisy ones
all_channels = set(range(cropped_img_data.shape[2]))
remaining_channels = list(all_channels - noisy_channels)

# Select the non-noisy channels
filtered_img_data = cropped_img_data[:, :, remaining_channels]
channels_df = channels_df.iloc[remaining_channels]

data, cell_ids, centroids, dataScaleSize, cellSizes = extract_data_from_gdal_tiffs(filtered_img_data, mask)

The extracted data can then be used to either directly run UTAG

In [None]:
remaining_channels = [i for i in range(filtered_img_data.shape[2])]  # Channels to include in the analysis
unmod_adata = tif_create_data(filtered_img_data, cell_ids, centroids, data, channels_df, remaining_channels)

utag_unmod = utag(
    unmod_adata,
    slide_key="roi",
    max_dist=15,
    normalization_mode='l1_norm',
    clustering_method = 'parc',
    resolutions = [0.3]
)

Alternatively REDSEA spillover compensation can be applied before clustering with   

In [None]:
# Running REDSEA
data_compen_scale_size_cells, means, stdevs, valid_indices = redsea_compensation(data, mask, filtered_img_data, channels_df, feature_tiff_path,
                                                                                 cell_sizes, element_shape=2, element_size=2)

redsea_adata = create_anndata(data_compen_scale_size_cells, cell_ids, centroids, means, stdevs, channels_df, valid_indices)
# Running UTAG
utag_redsea = utag(
    redsea_adata,
    slide_key="roi",
    max_dist=15,
    normalization_mode='l1_norm',
    apply_clustering=True,
    clustering_method = 'parc',
    resolutions = [0.3]
)