In [40]:
import tifffile
from tifffile import imwrite
from pathlib import Path
import numpy as np
import pandas as pd

from readimc import MCDFile, TXTFile

from tempfile import TemporaryDirectory
from typing import Any, Dict, Generator, List, Optional, Sequence, Tuple, Union

import re
from os import PathLike

In [88]:
def create_panels_from_mcd_file(mcd_file: Union[str, PathLike]) -> List[pd.DataFrame]:
    panels = []
    with MCDFile(mcd_file) as f:
        i = 0
        for slide in f.slides:
            print('slide idx ', i)
            j = 0
            for acquisition in slide.acquisitions:
                #print('acquisition idx ', j)
                panel = pd.DataFrame(
                    data={
                        "channel": pd.Series(
                            data=acquisition.channel_names,
                            dtype=pd.StringDtype(),
                        ),
                        "name": pd.Series(
                            data=acquisition.channel_labels,
                            dtype=pd.StringDtype(),
                        ),
                    },
                )
                j += 1
                panels.append(panel)
            i += 1
        #import pdb; pdb.set_trace()
    return panels

In [89]:
mcd_86_A_file = '/work/FAC/FBM/DBC/mrapsoma/prometex/data/NSCLC/01_raw/raw/mcd/86_A/2020115_LC_NSCLC_TMA_86_A.mcd'
panels = create_panels_from_mcd_file(mcd_86_A_file)

print('check that the panels are the same')
for i in range(1, len(panels)):
    assert panels[i].equals(panels[0]) 
    if not(panels[i].equals(panels[0])) : print('panel ', i, ' is different as panel 0')

slide idx  0
check that the panels are the same


In [92]:
panels[0][panels[0]['channel'] == 'Yb172']

Unnamed: 0,channel,name
96,Yb172,CD31_1859((3370))Yb172


In [19]:
def extract_tiff_info(mcd_file):
    """
    Extracts information from an MCD file, including the number of TIFF images,
    the number of channels per image, and the channel names.

    Args:
        mcd_file (str or Path): Path to the MCD file.

    Returns:
        tuple: (num_tiff_images, channels_per_image, channel_names)
    """
    mcd_file = Path(mcd_file)
    if not mcd_file.is_file():
        raise FileNotFoundError(f"MCD file not found: {mcd_file}")
    
    # Open the MCD file
    with MCDFile(mcd_file) as mcd:
        num_tiff_images = sum(len(slide.acquisitions) for slide in mcd.slides)
        
        if num_tiff_images == 0:
            raise ValueError("No TIFF images found in the MCD file.")
        
        # Extract number of channels from the first acquisition (assuming all are similar)
        first_acq = mcd.slides[0].acquisitions[0]
        channels_per_image = len(first_acq.channel_names)
        channel_names = first_acq.channel_names
        
    return num_tiff_images, channels_per_image, channel_names

In [20]:
extract_tiff_info('/work/FAC/FBM/DBC/mrapsoma/prometex/data/NSCLC/01_raw/raw/mcd/86_A/2020115_LC_NSCLC_TMA_86_A.mcd')

(95,
 134,
 ['As75',
  'Se76',
  'Se77',
  'Se78',
  'ArAr80',
  'Br81',
  'Kr82',
  'Kr83',
  'Sr84',
  'Rb85',
  'Sr86',
  'Sr87',
  'Sr88',
  'Y89',
  'Zr90',
  'Zr91',
  'Zr92',
  'Nb93',
  'Mo94',
  'Mo95',
  'Mo96',
  'Mo97',
  'Mo98',
  'Ru99',
  'Ru100',
  'Ru101',
  'Ru102',
  'Rh103',
  'Pd104',
  'Pd105',
  'Pd106',
  'Ag107',
  'Cd108',
  'Ag109',
  'Cd110',
  'Cd111',
  'Cd112',
  'In113',
  'Cd114',
  'In115',
  'Sn116',
  'Sn117',
  'Sn118',
  'Sn119',
  'Sn120',
  'Sb121',
  'Te122',
  'Te123',
  'Te124',
  'Te125',
  'Te126',
  'I127',
  'Xe128',
  'Xe129',
  'Xe130',
  'Xe131',
  'Xe132',
  'Cs133',
  'Ba134',
  'Ba135',
  'Ba136',
  'Ba137',
  'Ba138',
  'La139',
  'Ce140',
  'Pr141',
  'Nd142',
  'Nd143',
  'Nd144',
  'Nd145',
  'Nd146',
  'Sm147',
  'Nd148',
  'Sm149',
  'Nd150',
  'Eu151',
  'Sm152',
  'Eu153',
  'Sm154',
  'Gd155',
  'Gd156',
  'Gd157',
  'Gd158',
  'Tb159',
  'Gd160',
  'Dy161',
  'Dy162',
  'Dy163',
  'Dy164',
  'Ho165',
  'Er166',
  'Er167',
 

In [None]:
import tifffile
from pathlib import Path
from typing import List
import numpy as np

def extract_tiff_metadata(mcd_file, output_dir, raw_panel_path):

    mcd_file = Path(mcd_file)
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    
    if not mcd_file.is_file():
        raise FileNotFoundError(f"MCD file not found: {mcd_file}")
    
    tiff_files = []
    
    # Open the MCD file
    with MCDFile(mcd_file) as mcd:
        num_tiff_images = sum(len(slide.acquisitions) for slide in mcd.slides)
        
        if num_tiff_images == 0:
            raise ValueError("No TIFF images found in the MCD file.")
        
        # Extract number of channels from the first acquisition (assuming all are similar)
        first_acq = mcd.slides[0].acquisitions[0]
        channels_per_image = len(first_acq.channel_names)
        channel_names = first_acq.channel_names
        
        # Process each acquisition (similar to the try_preprocess_images_from_disk method)
        for slide_idx, slide in enumerate(mcd.slides):

            for acq_idx, acquisition in enumerate(slide.acquisitions):
                print('acq_idx ', acq_idx)  

                raw_panel = pd.read_csv('/work/FAC/FBM/DBC/mrapsoma/prometex/data/NSCLC/01_raw/raw/raw_panel.csv')

                # channels to keep 
                panel_channels_to_keep = raw_panel[raw_panel['full'] == 1]

                # Read the acquisition data, which includes all channels
                img_all_channels = mcd.read_acquisition(acquisition, strict=False)

                # mask acquisition layers and channels to keep , MASK RELATED TO IMAGE (mcd.read_acquisition(acquisition, strict=False)) ORDER 
                mask = [1 if name in panel_channels_to_keep['Metal Tag'].values else 0 for name in acquisition.channel_names]

                import pdb; pdb.set_trace()

                # verify the kept channels are 43, accordin to the proteins in the panel
                assert np.sum(mask) == 43 and np.sum(mask) == len(panel_channels_to_keep), 'number of channels to keep is not 43'

                # filtered image with the 43 relevant protein channels 
                img_filtered_channels = img_all_channels[np.array(mask, dtype=bool), :, :]

                # mapping channels to protein 
                filtered_channel_protein_metal_tag = [element for element, m in zip(acquisition.channel_names, mask) if m == 1]

                metal_to_target_mapping = dict(zip(raw_panel['Metal Tag'], raw_panel['Target']))
                mapped_targets = {metal: metal_to_target_mapping.get(metal, "No Target Found") for metal in filtered_channel_protein_metal_tag}
                mapped_df = pd.DataFrame(list(mapped_targets.items()), columns=['Metal Tag', 'Target'])

                # metedata on protein name preparation
                metadata = {f"Layer {i+1}": {"Target": mapped_df['Target'][i]} for i in range(len(mapped_df))}
                # Convert metadata to a string format compatible with TIFF files
                metadata_str = "\n".join([f"Layer {i+1}: {mapped_df['Target'][i]}" for i in range(len(mapped_df))])


                pattern = r'(\d+[A-Z]?)\b'
                #match = re.search(pattern, str(mcd_file))
                # TO DO use re 
                TMA_cell_pattern = str(mcd_file)[-8:-4]

                # save ndarray as a multi-page TIFF file with metadata
                imwrite(output_dir / f'{TMA_cell_pattern}_{acq_idx}.tiff', img_filtered_channels.astype(np.float32), metadata={'ImageDescription': metadata_str})

                import pdb; pdb.set_trace()




In [85]:
raw_panel_path = '/work/FAC/FBM/DBC/mrapsoma/prometex/data/NSCLC/01_raw/raw/raw_panel.csv'

extract_tiff_metadata('/work/FAC/FBM/DBC/mrapsoma/prometex/data/NSCLC/01_raw/raw/mcd/86_A/2020115_LC_NSCLC_TMA_86_A.mcd', '', raw_panel_path)

acq_idx  0
> [0;32m/tmp/ipykernel_633065/2896277543.py[0m(32)[0;36mextract_tiff_metadata[0;34m()[0m
[0;32m     30 [0;31m        [0;32mfor[0m [0mslide_idx[0m[0;34m,[0m [0mslide[0m [0;32min[0m [0menumerate[0m[0;34m([0m[0mmcd[0m[0;34m.[0m[0mslides[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     31 [0;31m[0;34m[0m[0m
[0m[0;32m---> 32 [0;31m            [0;32mfor[0m [0macq_idx[0m[0;34m,[0m [0macquisition[0m [0;32min[0m [0menumerate[0m[0;34m([0m[0mslide[0m[0;34m.[0m[0macquisitions[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     33 [0;31m                [0mprint[0m[0;34m([0m[0;34m'acq_idx '[0m[0;34m,[0m [0macq_idx[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     34 [0;31m[0;34m[0m[0m
[0m


In [87]:
from skimage import io

io.imread('/work/FAC/FBM/DBC/mrapsoma/prometex/data/NSCLC/02_processed/tiff_imgs/178_B_7.tiff')

array([[[0.       , 0.       , 0.       , ..., 0.       , 1.       ,
         0.       ],
        [0.       , 0.       , 0.       , ..., 0.       , 0.       ,
         0.       ],
        [0.       , 0.       , 0.       , ..., 0.       , 0.       ,
         0.       ],
        ...,
        [0.       , 0.       , 0.       , ..., 0.       , 0.       ,
         0.       ],
        [0.       , 0.       , 0.       , ..., 0.       , 0.       ,
         0.       ],
        [0.       , 0.       , 0.       , ..., 0.       , 0.       ,
         0.       ]],

       [[0.       , 0.       , 0.       , ..., 0.       , 0.       ,
         0.       ],
        [0.       , 0.       , 0.       , ..., 0.       , 0.       ,
         0.       ],
        [0.       , 0.       , 0.       , ..., 0.       , 0.       ,
         0.       ],
        ...,
        [0.       , 0.       , 0.       , ..., 0.       , 0.       ,
         0.       ],
        [0.       , 0.       , 0.       , ..., 0.       , 0.       ,
   

In [93]:
import tifffile

# Load the TIFF file
tiff_file_path = "86_A_0.tiff"
with tifffile.TiffFile(tiff_file_path) as tif:
    # Read the image data (the actual 3D array)
    image_data = tif.asarray()
    
    # List all available tags for the first page (the metadata might be in one of these)
    print("Available tags in the TIFF file:")
    for tag in tif.pages[0].tags.values():
        print(tag.name)
    
    # Extract the metadata if it's available
    if 'ImageDescription' in tif.pages[0].tags:
        description = tif.pages[0].tags['ImageDescription'].value
        print("\nMetadata (ImageDescription) from TIFF file:")
        print(description)
    else:
        print("\n'Contain Description' tag not found in this TIFF file.")
    
# You can also manipulate the image data
print(f"\nShape of the image: {image_data.shape}")


Available tags in the TIFF file:
ImageWidth
ImageLength
BitsPerSample
Compression
PhotometricInterpretation
ImageDescription
StripOffsets
SamplesPerPixel
RowsPerStrip
StripByteCounts
XResolution
YResolution
ResolutionUnit
Software
SampleFormat

Metadata (ImageDescription) from TIFF file:
{"ImageDescription": "Layer 1: Myeloperoxidase MPO\nLayer 2: FSP1 / S100A4\nLayer 3: SMA\nLayer 4: Histone H3\nLayer 5: fap\nLayer 6: HLA-DR\nLayer 7: CD146\nLayer 8: Cadherin-11\nLayer 9: Carbonic Anhydrase IX\nLayer 10: Fibronectin\nLayer 11: VCAM1\nLayer 12: CD20\nLayer 13: CD68\nLayer 14: Indoleamine 2- 3-dioxygenase (IDO)\nLayer 15: CD3\nLayer 16: Podoplanin\nLayer 17: MMP11\nLayer 18: CD279 (PD-1)\nLayer 19: CD73\nLayer 20: MMP9\nLayer 21: p75 (CD271)\nLayer 22: TCF1/TCF7\nLayer 23: CD10\nLayer 24: Vimentin\nLayer 25: FOXP3\nLayer 26: CD45RO\nLayer 27: PNAd\nLayer 28: CD8a\nLayer 29: CD248 / Endosialin\nLayer 30: LYVE-1\nLayer 31: CD140b (PDGF Receptor beta)\nLayer 32: CD34\nLayer 33: CD4\nLayer 