In [45]:
import pandas as pd
import numpy as np  

from readimc import MCDFile, TXTFile

import anndata
import pickle

from imread import imread, imsave

import logging
import re
from os import PathLike
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Any, Dict, Generator, List, Optional, Sequence, Tuple, Union
from zipfile import ZipFile

import numpy as np
import pandas as pd
from scipy.ndimage import maximum_filter

from PIL import Image
from PIL.ExifTags import TAGS
import tifffile

In [24]:
def create_panels_from_mcd_file(mcd_file: Union[str, PathLike]) -> List[pd.DataFrame]:
    panels = []
    with MCDFile(mcd_file) as f:
        i = 0
        for slide in f.slides:
            print('slide idx ', i)
            j = 0
            for acquisition in slide.acquisitions:
                print('acquisition idx ', j)
                panel = pd.DataFrame(
                    data={
                        "channel": pd.Series(
                            data=acquisition.channel_names,
                            dtype=pd.StringDtype(),
                        ),
                        "name": pd.Series(
                            data=acquisition.channel_labels,
                            dtype=pd.StringDtype(),
                        ),
                    },
                )
                j += 1
                panels.append(panel)
            i += 1
        #import pdb; pdb.set_trace()
    return panels

In [25]:
mcd_86_A_file = '/work/FAC/FBM/DBC/mrapsoma/prometex/data/NSCLC/01_raw/raw/mcd/86_A/2020115_LC_NSCLC_TMA_86_A.mcd'

In [26]:
create_panels_from_mcd_file(mcd_86_A_file)

slide idx  0
acquisition idx  0
acquisition idx  1
acquisition idx  2
acquisition idx  3
acquisition idx  4
acquisition idx  5
acquisition idx  6
acquisition idx  7
acquisition idx  8
acquisition idx  9
acquisition idx  10
acquisition idx  11
acquisition idx  12
acquisition idx  13
acquisition idx  14
acquisition idx  15
acquisition idx  16
acquisition idx  17
acquisition idx  18
acquisition idx  19
acquisition idx  20
acquisition idx  21
acquisition idx  22
acquisition idx  23
acquisition idx  24
acquisition idx  25
acquisition idx  26
acquisition idx  27
acquisition idx  28
acquisition idx  29
acquisition idx  30
acquisition idx  31
acquisition idx  32
acquisition idx  33
acquisition idx  34
acquisition idx  35
acquisition idx  36
acquisition idx  37
acquisition idx  38
acquisition idx  39
acquisition idx  40
acquisition idx  41
acquisition idx  42
acquisition idx  43
acquisition idx  44
acquisition idx  45
acquisition idx  46
acquisition idx  47
acquisition idx  48
acquisition idx  

[    channel                   name
 0      As75                   75As
 1      Se76                   76Se
 2      Se77                   77Se
 3      Se78                   78Se
 4    ArAr80                 80ArAr
 ..      ...                    ...
 129   Tl205                  205Tl
 130   Pb206                  206Pb
 131   Pb207                  207Pb
 132   Pb208                  208Pb
 133   Bi209  CD15_627((2997))Bi209
 
 [134 rows x 2 columns],
     channel                   name
 0      As75                   75As
 1      Se76                   76Se
 2      Se77                   77Se
 3      Se78                   78Se
 4    ArAr80                 80ArAr
 ..      ...                    ...
 129   Tl205                  205Tl
 130   Pb206                  206Pb
 131   Pb207                  207Pb
 132   Pb208                  208Pb
 133   Bi209  CD15_627((2997))Bi209
 
 [134 rows x 2 columns],
     channel                   name
 0      As75                   75As
 1      Se

In [32]:
import imctools
import numpy as np
import pandas as pd
from pathlib import Path

def extract_tiff_info(mcd_file):
    """
    Extracts information from an MCD file, including the number of TIFF images
    and the number of channels per image.

    Args:
        mcd_file (str or Path): Path to the MCD file.

    Returns:
        tuple: (num_tiff_images, channels_per_image)
    """
    mcd_file = Path(mcd_file)
    if not mcd_file.is_file():
        raise FileNotFoundError(f"MCD file not found: {mcd_file}")
    
    # Open the MCD file
    with MCDFile(mcd_file) as mcd:
        num_tiff_images = sum(len(slide.acquisitions) for slide in mcd.slides)
        
        if num_tiff_images == 0:
            raise ValueError("No TIFF images found in the MCD file.")
        
        # Extract number of channels from the first acquisition (assuming all are similar)
        first_acq = mcd.slides[0].acquisitions[0]
        channels_per_image = len(first_acq.channel_names)
        
    return num_tiff_images, channels_per_image

In [33]:
extract_tiff_info('/work/FAC/FBM/DBC/mrapsoma/prometex/data/NSCLC/01_raw/raw/mcd/86_A/2020115_LC_NSCLC_TMA_86_A.mcd')

(95, 134)

In [34]:
def extract_tiff_info(mcd_file):
    """
    Extracts information from an MCD file, including the number of TIFF images,
    the number of channels per image, and the channel names.

    Args:
        mcd_file (str or Path): Path to the MCD file.

    Returns:
        tuple: (num_tiff_images, channels_per_image, channel_names)
    """
    mcd_file = Path(mcd_file)
    if not mcd_file.is_file():
        raise FileNotFoundError(f"MCD file not found: {mcd_file}")
    
    # Open the MCD file
    with MCDFile(mcd_file) as mcd:
        num_tiff_images = sum(len(slide.acquisitions) for slide in mcd.slides)
        
        if num_tiff_images == 0:
            raise ValueError("No TIFF images found in the MCD file.")
        
        # Extract number of channels from the first acquisition (assuming all are similar)
        first_acq = mcd.slides[0].acquisitions[0]
        channels_per_image = len(first_acq.channel_names)
        channel_names = first_acq.channel_names
        
    return num_tiff_images, channels_per_image, channel_names

In [35]:
extract_tiff_info('/work/FAC/FBM/DBC/mrapsoma/prometex/data/NSCLC/01_raw/raw/mcd/86_A/2020115_LC_NSCLC_TMA_86_A.mcd')

(95,
 134,
 ['As75',
  'Se76',
  'Se77',
  'Se78',
  'ArAr80',
  'Br81',
  'Kr82',
  'Kr83',
  'Sr84',
  'Rb85',
  'Sr86',
  'Sr87',
  'Sr88',
  'Y89',
  'Zr90',
  'Zr91',
  'Zr92',
  'Nb93',
  'Mo94',
  'Mo95',
  'Mo96',
  'Mo97',
  'Mo98',
  'Ru99',
  'Ru100',
  'Ru101',
  'Ru102',
  'Rh103',
  'Pd104',
  'Pd105',
  'Pd106',
  'Ag107',
  'Cd108',
  'Ag109',
  'Cd110',
  'Cd111',
  'Cd112',
  'In113',
  'Cd114',
  'In115',
  'Sn116',
  'Sn117',
  'Sn118',
  'Sn119',
  'Sn120',
  'Sb121',
  'Te122',
  'Te123',
  'Te124',
  'Te125',
  'Te126',
  'I127',
  'Xe128',
  'Xe129',
  'Xe130',
  'Xe131',
  'Xe132',
  'Cs133',
  'Ba134',
  'Ba135',
  'Ba136',
  'Ba137',
  'Ba138',
  'La139',
  'Ce140',
  'Pr141',
  'Nd142',
  'Nd143',
  'Nd144',
  'Nd145',
  'Nd146',
  'Sm147',
  'Nd148',
  'Sm149',
  'Nd150',
  'Eu151',
  'Sm152',
  'Eu153',
  'Sm154',
  'Gd155',
  'Gd156',
  'Gd157',
  'Gd158',
  'Tb159',
  'Gd160',
  'Dy161',
  'Dy162',
  'Dy163',
  'Dy164',
  'Ho165',
  'Er166',
  'Er167',
 

In [37]:
import tifffile

def extract_tiff_info(mcd_file, output_dir):
    """
    Extracts information from an MCD file, including the number of TIFF images,
    the number of channels per image, the channel names, and saves TIFF images.

    Args:
        mcd_file (str or Path): Path to the MCD file.
        output_dir (str or Path): Directory to save extracted TIFF images.

    Returns:
        tuple: (num_tiff_images, channels_per_image, channel_names, tiff_files)
    """
    mcd_file = Path(mcd_file)
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    
    if not mcd_file.is_file():
        raise FileNotFoundError(f"MCD file not found: {mcd_file}")
    
    tiff_files = []
    
    # Open the MCD file
    with MCDFile(mcd_file) as mcd:
        num_tiff_images = sum(len(slide.acquisitions) for slide in mcd.slides)
        
        if num_tiff_images == 0:
            raise ValueError("No TIFF images found in the MCD file.")
        
        # Extract number of channels from the first acquisition (assuming all are similar)
        first_acq = mcd.slides[0].acquisitions[0]
        channels_per_image = len(first_acq.channel_names)
        channel_names = first_acq.channel_names
        
        # Save each acquisition as a TIFF file
        for slide_idx, slide in enumerate(mcd.slides):
            for acq_idx, acquisition in enumerate(slide.acquisitions):
                img_data = acquisition.get_image()
                tiff_path = output_dir / f"slide{slide_idx+1}_acq{acq_idx+1}.tiff"
                tifffile.imwrite(tiff_path, img_data)
                tiff_files.append(str(tiff_path))
    
    return num_tiff_images, channels_per_image, channel_names, tiff_files

In [38]:
extract_tiff_info('/work/FAC/FBM/DBC/mrapsoma/prometex/data/NSCLC/01_raw/raw/mcd/86_A/2020115_LC_NSCLC_TMA_86_A.mcd', '')

AttributeError: 'Acquisition' object has no attribute 'get_image'

In [41]:
import tifffile
from pathlib import Path
from typing import List
import numpy as np

def extract_tiff_info(mcd_file, output_dir):
    """
    Extracts information from an MCD file, including the number of TIFF images,
    the number of channels per image, the channel names, and saves TIFF images.

    Args:
        mcd_file (str or Path): Path to the MCD file.
        output_dir (str or Path): Directory to save extracted TIFF images.

    Returns:
        tuple: (num_tiff_images, channels_per_image, channel_names, tiff_files)
    """
    mcd_file = Path(mcd_file)
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    
    if not mcd_file.is_file():
        raise FileNotFoundError(f"MCD file not found: {mcd_file}")
    
    tiff_files = []
    
    # Open the MCD file
    with MCDFile(mcd_file) as mcd:
        num_tiff_images = sum(len(slide.acquisitions) for slide in mcd.slides)
        
        if num_tiff_images == 0:
            raise ValueError("No TIFF images found in the MCD file.")
        
        # Extract number of channels from the first acquisition (assuming all are similar)
        first_acq = mcd.slides[0].acquisitions[0]
        channels_per_image = len(first_acq.channel_names)
        channel_names = first_acq.channel_names
        
        # Process each acquisition (similar to the try_preprocess_images_from_disk method)
        for slide_idx, slide in enumerate(mcd.slides):
            for acq_idx, acquisition in enumerate(slide.acquisitions):
                # Assuming there's a method that provides image data from the acquisition
                # Look for how acquisition objects provide image data in your case.
                # The following is an example based on the second function:
                import pdb; pdb.set_trace()
                img_data = acquisition.get_image_data()  # <-- This might be the correct method
                
                if img_data is None:
                    raise ValueError(f"No image data found for acquisition {acq_idx} in slide {slide_idx}")
                
                tiff_path = output_dir / f"slide{slide_idx+1}_acq{acq_idx+1}.tiff"
                tifffile.imwrite(tiff_path, img_data)
                tiff_files.append(str(tiff_path))
    
    return num_tiff_images, channels_per_image, channel_names, tiff_files

In [42]:
extract_tiff_info('/work/FAC/FBM/DBC/mrapsoma/prometex/data/NSCLC/01_raw/raw/mcd/86_A/2020115_LC_NSCLC_TMA_86_A.mcd', '')

> [0;32m/tmp/ipykernel_1395218/1630205988.py[0m(46)[0;36mextract_tiff_info[0;34m()[0m
[0;32m     44 [0;31m                [0;31m# The following is an example based on the second function:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     45 [0;31m                [0;32mimport[0m [0mpdb[0m[0;34m;[0m [0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 46 [0;31m                [0mimg_data[0m [0;34m=[0m [0macquisition[0m[0;34m.[0m[0mget_image_data[0m[0;34m([0m[0;34m)[0m  [0;31m# <-- This might be the correct method[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     47 [0;31m[0;34m[0m[0m
[0m[0;32m     48 [0;31m                [0;32mif[0m [0mimg_data[0m [0;32mis[0m [0;32mNone[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m
Acquisition(slide=Slide(id=0, metadata={'ID': '0', 'Description': 'Slide', 'Filename': 'E:\\CyTOF\\Zoidberg\\20210115_LC_NSCLC_TMA_86_A\\2020115_LC_NSCLC_TMA_86_A.mcd', 'SlideType': '"

In [43]:

def extract_metadata(path):
    metadata = pd.DataFrame()
    metadata.index.name = "page"

    with tifffile.TiffFile(path) as tif:
        for page_num, page in enumerate(tif.pages):
            description_tag = page.tags.get("ImageDescription")
            description = description_tag.value if description_tag else ""
            #import pdb; pdb.set_trace()

            metadata.loc[page_num, "ImageDescription"] = description

            match_channels = re.search(r'channels=(\d+)', description)
            metadata.loc[page_num, "channel"] = int(match_channels.group(1)) if match_channels else None

    metadata = metadata.convert_dtypes()

    return metadata



path = '/work/FAC/FBM/DBC/mrapsoma/prometex/data/NSCLC/01_raw/raw/img/20210129_LC_NSCLC_TMA_88_A_073.tiff'
metadata_df = extract_metadata(path)
print(metadata_df)

                                       ImageDescription  channel
page                                                            
0     ImageJ=1.11a
images=43
channels=43
hyperstack=...       43
1                                                           <NA>
2                                                           <NA>
3                                                           <NA>
4                                                           <NA>
5                                                           <NA>
6                                                           <NA>
7                                                           <NA>
8                                                           <NA>
9                                                           <NA>
10                                                          <NA>
11                                                          <NA>
12                                                          <NA>
13                       

In [46]:
image = Image.open('/work/FAC/FBM/DBC/mrapsoma/prometex/data/NSCLC/01_raw/raw/img/20210129_LC_NSCLC_TMA_88_A_073.tiff')

exif_data = image.getexif()

for tag_id, value in exif_data.items():
    tag = TAGS.get(tag_id, tag_id)
    print(f"{tag}: {value}")

print(' ------- ')

with tifffile.TiffFile('/work/FAC/FBM/DBC/mrapsoma/prometex/data/NSCLC/01_raw/raw/img/20210129_LC_NSCLC_TMA_88_A_073.tiff') as tif:
    for tag in tif.pages[0].tags.values():
        print(f"{tag.name}: {tag.value}")

ImageWidth: 654
ImageLength: 733
BitsPerSample: 32
Compression: 1
PhotometricInterpretation: 1
ResolutionUnit: 1
ImageDescription: ImageJ=1.11a
images=43
channels=43
hyperstack=true
mode=grayscale

StripOffsets: 368
Software: tifffile.py
SampleFormat: 3
SamplesPerPixel: 1
RowsPerStrip: 733
StripByteCounts: 1917528
XResolution: 1.0
YResolution: 1.0
 ------- 
ImageWidth: 654
ImageLength: 733
BitsPerSample: 32
Compression: 1
PhotometricInterpretation: 1
ImageDescription: ImageJ=1.11a
images=43
channels=43
hyperstack=true
mode=grayscale
StripOffsets: (368,)
SamplesPerPixel: 1
RowsPerStrip: 733
StripByteCounts: (1917528,)
XResolution: (1, 1)
YResolution: (1, 1)
ResolutionUnit: 1
Software: tifffile.py
SampleFormat: 3


In [None]:
import anndata2ri
from rpy2.robjects import r
from rpy2.robjects.conversion import localconverter

In [3]:
pd.read_csv('/work/FAC/FBM/DBC/mrapsoma/prometex/data/NSCLC/01_raw/Patient_Stratification/patient_groups_k4.csv')

Unnamed: 0.1,Unnamed: 0,metacluster,Patient_ID
0,175_1,1,175_1
1,175_10,2,175_10
2,175_100,3,175_100
3,175_101,3,175_101
4,175_102,1,175_102
...,...,...,...
972,88_534,3,88_534
973,88_535,2,88_535
974,88_536,2,88_536
975,88_537,1,88_537


In [4]:
ad = anndata.read('/work/FAC/FBM/DBC/mrapsoma/prometex/data/NSCLC/02_processed/sce_objects/sce.h5ad')



In [8]:
len(np.unique(ad.obs['Patient_ID']))

1071

In [5]:
ad.obs.columns

Index(['ImageNumber', 'CellNumber', 'Center_X', 'Center_Y', 'Area',
       'MajorAxisLength', 'MinorAxisLength', 'Compartment', 'Area_Description',
       'BatchID', 'Panel', 'TmaID', 'TmaBlock', 'acID', 'CellID', 'mclust',
       'TMA', 'Tma_ac', 'cell_category', 'cell_type', 'cell_subtype', 'ROI_xy',
       'RoiID', 'Patient_Nr', 'X..spots', 'DX.name', 'x.y.localisation', 'Age',
       'Gender', 'Typ', 'Grade', 'Size', 'Vessel', 'Pleura', 'T.new', 'N',
       'M.new', 'Stage', 'R', 'Chemo', 'Radio', 'Chemo3', 'Radio4', 'Relapse',
       'Chemo5', 'Radio6', 'DFS', 'Ev.O', 'OS', 'Smok', 'Nikotin', 'ROI',
       'Patient_ID', 'LN.Met', 'Dist.Met', 'NeoAdj', 'Area_px_Stroma',
       'Area_px_Tumour', 'Area_px_Core', 'Area_mm_Stroma', 'Area_mm_Tumour',
       'Area_mm_Core'],
      dtype='object')

In [12]:
ad.obs['acID']

86_A_1_4          1
86_A_1_7          1
86_A_1_27         1
86_A_1_29         1
86_A_1_31         1
                 ..
178_C_98_2214    98
178_C_99_4       99
178_C_99_1499    99
178_C_99_2444    99
178_C_99_3006    99
Name: acID, Length: 5984454, dtype: int32

In [13]:
ad.obs['Patient_Nr'].nunique()

570

In [14]:
data = ad.obs

In [15]:
data.reset_index('sample_id', drop=False, inplace=True)

KeyError: 'Requested level (sample_id) does not match index name (None)'

# Table of Contents

1. [MCD files](#MCD-files)
2. [SCE anndata](#SCE-anndata)
3. [Masks](#Masks)

In [None]:
panel = pd.read_csv('/work/FAC/FBM/DBC/mrapsoma/prometex/data/NSCLC/01_raw/cp_csv/panel.csv')
panel[panel['Target'].notna()]


Unnamed: 0,Tube Number,Metal Tag,Target,Antibody Clone,Stock Concentration,Final Concentration / Dilution,uL to add,800 ul,Clean_Target,full,ilastik
0,2131.0,Bi209,CD15,HI98,500.0,5 ug/mL,1.0,8.0,CD15,1,0
1,2163.0,Dy161,CD10,E5P7S,500.0,5 ug/mL,1.0,8.0,CD10,1,0
2,1076.0,Dy162,Vimentin,EPR3776,150.0,1 ug/mL,0.666667,5.333333,Vimentin,1,0
3,2045.0,Dy163,FOXP3,236A/E7,500.0,5 ug/mL,1.0,8.0,FOXP3,1,0
4,2030.0,Dy164,CD45RA,HI100,500.0,2 ug/mL,0.4,3.2,CD45RA + CD45R0,1,0
5,2302.0,Dy164,CD45RO,UCHL1,500.0,2 ug/mL,0.4,3.2,CD45RO,0,0
6,2125.0,Er166,CD8a,C8/144B,500.0,4 ug/mL,0.8,6.4,CD8a,1,0
7,1964.0,Er167,CD248 / Endosialin,Polyclonal_Proteintech,500.0,8 ug/mL,1.6,12.8,CD248 / Endosialin,1,0
8,2015.0,Er168,LYVE-1,Polyclonal_LYVE-1,500.0,7 ug/mL,1.4,11.2,LYVE-1,1,0
9,2250.0,Er170,CD34,EP373Y,500.0,10 ug/mL,2.0,16.0,CD34,1,0


# MCD files

In [2]:
with MCDFile('/work/FAC/FBM/DBC/mrapsoma/prometex/data/NSCLC/01_raw/mcd/mcd/2020120_LC_NSCLC_TMA_86_C.mcd') as f:
    num_slides = len(f.slides)


In [3]:
num_slides

1

In [6]:
pip install imctools

Collecting imctools
  Downloading imctools-2.1.8-py3-none-any.whl.metadata (2.5 kB)
Collecting imagecodecs (from imctools)
  Downloading imagecodecs-2024.12.30-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)
Collecting xmltodict>=0.12.0 (from imctools)
  Downloading xmltodict-0.14.2-py2.py3-none-any.whl.metadata (8.0 kB)
Collecting xtiff>=0.7.4 (from imctools)
  Downloading xtiff-0.7.9-py3-none-any.whl.metadata (9.2 kB)
Downloading imctools-2.1.8-py3-none-any.whl (41 kB)
Downloading xmltodict-0.14.2-py2.py3-none-any.whl (10.0 kB)
Downloading xtiff-0.7.9-py3-none-any.whl (11 kB)
Downloading imagecodecs-2024.12.30-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (45.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 MB[0m [31m171.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: xmltodict, imagecodecs, xtiff, imctools
Successfully installed imagecodecs-2024.12.30 imctools-2.1.8 xmltodi

In [8]:
from imctools.io.mcd.mcdparser import McdParser

mcd_file = '/work/FAC/FBM/DBC/mrapsoma/prometex/data/NSCLC/01_raw/mcd/mcd/2020120_LC_NSCLC_TMA_86_C.mcd'
with McdParser(mcd_file) as parser:
    print(dir(parser)) 

['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__enter__', '__eq__', '__exit__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_fh', '_get_ablation_image', '_get_acquisition_raw_data', '_get_buffer', '_get_mcd_xml', '_inject_imc_datafile', '_meta_fh', '_save_ablation_image', '_xml_parser', 'close', 'get_acquisition_data', 'get_after_ablation_image', 'get_before_ablation_image', 'get_mcd_xml', 'get_panorama_image', 'get_slide_image', 'mcd_filename', 'origin', 'save_after_ablation_image', 'save_before_ablation_image', 'save_panorama_image', 'save_slide_image', 'session']


In [48]:
import cv2

with McdParser(mcd_file) as parser:
    slide_img = parser.get_slide_image(0) 

# image in bytes, convert it using cv2
len(slide_img)

cv2.imdecode(np.frombuffer(slide_img, np.uint8), -1).shape

(669, 2002, 4)

In [39]:
with McdParser(mcd_file) as parser:
    parser.__class__() 


TypeError: McdParser.__init__() missing 1 required positional argument: 'filepath'

In [40]:
with McdParser(mcd_file) as parser:
    metadata_xml = parser.get_mcd_xml()
    print(metadata_xml) 

<MCDSchema xmlns="http://www.fluidigm.com/IMC/MCDSchema_V2_0.xsd">
  <Slide>
    <ID>0</ID>
    <Description>Slide</Description>
    <Filename>E:\CyTOF\Zoidberg\20210120_LC_NSCLC_TMA_86_C\2020120_LC_NSCLC_TMA_86_C.mcd</Filename>
    <SlideType>""</SlideType>
    <WidthUm>75000</WidthUm>
    <HeightUm>25000</HeightUm>
    <ImageEndOffset>27007</ImageEndOffset>
    <ImageStartOffset>166</ImageStartOffset>
    <ImageFile>""</ImageFile>
    <EnergyDb>0</EnergyDb>
    <Frequency>200</Frequency>
    <FMarkSlideLength>125000</FMarkSlideLength>
    <FMarkSlideThickness>12000</FMarkSlideThickness>
    <Name>Empty12AF</Name>
    <SwVersion>7.0.8493.0</SwVersion>
  </Slide>
  <Panorama>
    <ID>1</ID>
    <SlideID>0</SlideID>
    <Description>IMG_9656.jpg</Description>
    <SlideY4PosUm>478.972</SlideY4PosUm>
    <SlideX4PosUm>0</SlideX4PosUm>
    <SlideY3PosUm>478.972</SlideY3PosUm>
    <SlideX3PosUm>75000</SlideX3PosUm>
    <SlideY2PosUm>24520.972</SlideY2PosUm>
    <SlideX2PosUm>75000</SlideX2

In [None]:
with MCDFile('/Users/th9353/Documents/data/NSCLC/mcd/2020120_LC_NSCLC_TMA_86_C.mcd') as f:
    # first slide
    slide = f.slides[0]
    print(
        slide.id,
        slide.description,
        slide.width_um,
        slide.height_um,
    )
    # first panorama of first slide
    panorama = slide.panoramas[0]
    print(
        panorama.id,
        panorama.description,
        panorama.width_um,
        panorama.height_um,
    )
    # first acquisition of first slide
    acquisition = slide.acquisitions[0]
    print(
        acquisition.id,
        acquisition.description,
        acquisition.width_um,
        acquisition.height_um,
        acquisition.channel_names,  # metals
        acquisition.channel_labels,  # targets
    )

0 Slide 75000.0 25000.0
1 IMG_9656.jpg 75000.0 24042.0
1 C1,1 728.0 679.0 ['As75', 'Se76', 'Se77', 'Se78', 'ArAr80', 'Br81', 'Kr82', 'Kr83', 'Sr84', 'Rb85', 'Sr86', 'Sr87', 'Sr88', 'Y89', 'Zr90', 'Zr91', 'Zr92', 'Nb93', 'Mo94', 'Mo95', 'Mo96', 'Mo97', 'Mo98', 'Ru99', 'Ru100', 'Ru101', 'Ru102', 'Rh103', 'Pd104', 'Pd105', 'Pd106', 'Ag107', 'Cd108', 'Ag109', 'Cd110', 'Cd111', 'Cd112', 'In113', 'Cd114', 'In115', 'Sn116', 'Sn117', 'Sn118', 'Sn119', 'Sn120', 'Sb121', 'Te122', 'Te123', 'Te124', 'Te125', 'Te126', 'I127', 'Xe128', 'Xe129', 'Xe130', 'Xe131', 'Xe132', 'Cs133', 'Ba134', 'Ba135', 'Ba136', 'Ba137', 'Ba138', 'La139', 'Ce140', 'Pr141', 'Nd142', 'Nd143', 'Nd144', 'Nd145', 'Nd146', 'Sm147', 'Nd148', 'Sm149', 'Nd150', 'Eu151', 'Sm152', 'Eu153', 'Sm154', 'Gd155', 'Gd156', 'Gd157', 'Gd158', 'Tb159', 'Gd160', 'Dy161', 'Dy162', 'Dy163', 'Dy164', 'Ho165', 'Er166', 'Er167', 'Er168', 'Tm169', 'Er170', 'Yb171', 'Yb172', 'Yb173', 'Yb174', 'Lu175', 'Yb176', 'Hf177', 'Hf178', 'Hf179', 'Hf180', 'Ta1

In [46]:
with MCDFile('/work/FAC/FBM/DBC/mrapsoma/prometex/data/NSCLC/01_raw/mcd/mcd/2020120_LC_NSCLC_TMA_86_C.mcd') as f:
    print(len(f.slides))
    slide = f.slides[0]  
    img = f.read_slide(slide)  

1


In [None]:
img.shape

(669, 2002, 4)

In [None]:
pd.read_csv('/work/FAC/FBM/DBC/mrapsoma/prometex/data/NSCLC/01_raw/cp_csv/86_A_acquisition_metadata.csv')



Unnamed: 0,AcSession,ablation_distance_between_shots_x,ablation_distance_between_shots_y,ablation_frequency,ablation_power,description,end_timestamp,has_after_ablation_image,has_before_ablation_image,id,...,roi_end_x_pos_um,roi_end_y_pos_um,roi_start_x_pos_um,roi_start_y_pos_um,segment_data_format,signal_type,slide_id,source_path,start_timestamp,template
0,2020115_LC_NSCLC_TMA_86_A,1.0,1.0,400.0,6.0,"A1,1",2021-01-15T14:45:02.246881+01:00,False,False,1,...,38520.246,6443.245,37842.250,7161.536,Float,Dual,0,/tmp/tmp0jt4_t4k/20210115_LC_NSCLC_TMA_86_A/20...,2021-01-15T14:15:33.149367+01:00,LC_NSCLC_TMA_Study_V1.1
1,2020115_LC_NSCLC_TMA_86_A,1.0,1.0,400.0,6.0,"A1,2",2021-01-15T15:13:55.872597+01:00,False,False,2,...,38364.005,7242.464,37660.000,7938.822,Float,Dual,0,/tmp/tmp0jt4_t4k/20210115_LC_NSCLC_TMA_86_A/20...,2021-01-15T14:45:13.513040+01:00,LC_NSCLC_TMA_Study_V1.1
2,2020115_LC_NSCLC_TMA_86_A,1.0,1.0,400.0,6.0,"A1,5",2021-01-15T15:42:36.655189+01:00,False,False,5,...,38411.308,9442.869,37722.299,10141.212,Float,Dual,0,/tmp/tmp0jt4_t4k/20210115_LC_NSCLC_TMA_86_A/20...,2021-01-15T15:14:08.848729+01:00,LC_NSCLC_TMA_Study_V1.1
3,2020115_LC_NSCLC_TMA_86_A,1.0,1.0,400.0,6.0,"A1,6",2021-01-15T16:11:22.320265+01:00,False,False,6,...,38324.513,10235.165,37629.518,10933.518,Float,Dual,0,/tmp/tmp0jt4_t4k/20210115_LC_NSCLC_TMA_86_A/20...,2021-01-15T15:42:49.916157+01:00,LC_NSCLC_TMA_Study_V1.1
4,2020115_LC_NSCLC_TMA_86_A,1.0,1.0,400.0,6.0,"A1,7",2021-01-15T16:37:53.057545+01:00,False,False,7,...,38283.508,11029.163,37610.501,11689.589,Float,Dual,0,/tmp/tmp0jt4_t4k/20210115_LC_NSCLC_TMA_86_A/20...,2021-01-15T16:11:39.851030+01:00,LC_NSCLC_TMA_Study_V1.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90,2020115_LC_NSCLC_TMA_86_A,1.0,1.0,400.0,6.0,"A8,9",2021-01-17T07:25:21.424259+01:00,False,False,107,...,32811.716,12176.756,32123.715,12863.127,Float,Dual,0,/tmp/tmp0jt4_t4k/20210115_LC_NSCLC_TMA_86_A/20...,2021-01-17T06:57:32.804949+01:00,LC_NSCLC_TMA_Study_V1.1
91,2020115_LC_NSCLC_TMA_86_A,1.0,1.0,400.0,6.0,"A8,10",2021-01-17T07:52:51.592824+01:00,False,False,108,...,32691.134,13012.050,32007.120,13686.456,Float,Dual,0,/tmp/tmp0jt4_t4k/20210115_LC_NSCLC_TMA_86_A/20...,2021-01-17T07:25:40.166620+01:00,LC_NSCLC_TMA_Study_V1.1
92,2020115_LC_NSCLC_TMA_86_A,1.0,1.0,400.0,6.0,"A8,11",2021-01-17T08:20:33.306570+01:00,False,False,109,...,32619.053,13818.015,31948.047,14511.371,Float,Dual,0,/tmp/tmp0jt4_t4k/20210115_LC_NSCLC_TMA_86_A/20...,2021-01-17T07:53:05.987441+01:00,LC_NSCLC_TMA_Study_V1.1
93,2020115_LC_NSCLC_TMA_86_A,1.0,1.0,400.0,6.0,"A8,12",2021-01-17T08:49:04.418445+01:00,False,False,110,...,32686.243,14543.797,32023.237,15266.083,Float,Dual,0,/tmp/tmp0jt4_t4k/20210115_LC_NSCLC_TMA_86_A/20...,2021-01-17T08:20:47.877951+01:00,LC_NSCLC_TMA_Study_V1.1


In [None]:
pd.read_csv('/Users/th9353/Documents/data/NSCLC/cp_csv/86_A_Cells.csv').columns


Index(['ImageNumber', 'ObjectNumber', 'Metadata_FileLocation',
       'Metadata_Frame', 'Metadata_Series',
       'Metadata_ablation_distance_between_shots_x',
       'Metadata_ablation_distance_between_shots_y',
       'Metadata_ablation_frequency', 'Metadata_ablation_power',
       'Metadata_acid',
       ...
       'Neighbors_AngleBetweenNeighbors_20',
       'Neighbors_FirstClosestDistance_20',
       'Neighbors_FirstClosestObjectNumber_20',
       'Neighbors_NumberOfNeighbors_20', 'Neighbors_PercentTouching_20',
       'Neighbors_SecondClosestDistance_20',
       'Neighbors_SecondClosestObjectNumber_20', 'Number_Object_Number',
       'Parent_TumourExpanded', 'Parent_TumourMask'],
      dtype='object', length=158)

In [42]:
pd.read_csv('/work/FAC/FBM/DBC/mrapsoma/prometex/data/NSCLC/01_raw/cp_csv/86_A_Image.csv')

Unnamed: 0,AreaOccupied_AreaOccupied_TumourMaskBW,AreaOccupied_Perimeter_TumourMaskBW,AreaOccupied_TotalArea_TumourMaskBW,Count_Cells,Count_TumourExpanded,Count_TumourMask,ExecutionTime_01Images,ExecutionTime_02Metadata,ExecutionTime_03NamesAndTypes,ExecutionTime_04Groups,...,Threshold_SumOfEntropies_TumourMaskBW,Threshold_WeightedVariance_TumourMaskBW,URL_CellMask,URL_FullStack,URL_SpillMat,URL_TumourMask,Width_CellMask,Width_FullStack,Width_SpillMat,Width_TumourMask
0,55763.0,2809.343505,487396.0,2414.0,771.0,771.0,0,0,3.432022,0,...,-12.383053,2.341245e-13,file:////172.23.62.240/lenaSamba/lena_processe...,file:////172.23.62.240/lenaSamba/lena_processe...,file:////172.23.62.240/lenaSamba/lena_processe...,file:////172.23.62.240/lenaSamba/lena_processe...,676,676,43,676
1,94726.0,8182.212188,376852.0,1094.0,771.0,771.0,0,0,4.914032,0,...,-12.407850,1.084668e-12,file:////172.23.62.240/lenaSamba/lena_processe...,file:////172.23.62.240/lenaSamba/lena_processe...,file:////172.23.62.240/lenaSamba/lena_processe...,file:////172.23.62.240/lenaSamba/lena_processe...,626,626,43,626
2,60753.0,4449.209737,289444.0,868.0,771.0,771.0,0,0,4.711230,0,...,-12.435763,6.091714e-13,file:////172.23.62.240/lenaSamba/lena_processe...,file:////172.23.62.240/lenaSamba/lena_processe...,file:////172.23.62.240/lenaSamba/lena_processe...,file:////172.23.62.240/lenaSamba/lena_processe...,538,538,43,538
3,340458.0,7551.599923,491040.0,3118.0,771.0,771.0,0,0,4.664430,0,...,-12.441318,2.592085e-12,file:////172.23.62.240/lenaSamba/lena_processe...,file:////172.23.62.240/lenaSamba/lena_processe...,file:////172.23.62.240/lenaSamba/lena_processe...,file:////172.23.62.240/lenaSamba/lena_processe...,682,682,43,682
4,240225.0,8753.917816,462351.0,3640.0,771.0,771.0,0,0,4.742430,0,...,-12.429475,1.890195e-12,file:////172.23.62.240/lenaSamba/lena_processe...,file:////172.23.62.240/lenaSamba/lena_processe...,file:////172.23.62.240/lenaSamba/lena_processe...,file:////172.23.62.240/lenaSamba/lena_processe...,687,687,43,687
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90,219085.0,8618.504648,492768.0,2375.0,771.0,771.0,0,0,2.028013,0,...,-12.431108,1.036098e-12,file:////172.23.62.240/lenaSamba/lena_processe...,file:////172.23.62.240/lenaSamba/lena_processe...,file:////172.23.62.240/lenaSamba/lena_processe...,file:////172.23.62.240/lenaSamba/lena_processe...,696,696,43,696
91,45903.0,6766.791699,491352.0,2116.0,771.0,771.0,0,0,1.981213,0,...,-12.372687,8.496665e-14,file:////172.23.62.240/lenaSamba/lena_processe...,file:////172.23.62.240/lenaSamba/lena_processe...,file:////172.23.62.240/lenaSamba/lena_processe...,file:////172.23.62.240/lenaSamba/lena_processe...,694,694,43,694
92,23833.0,3743.080661,502392.0,540.0,771.0,771.0,0,0,1.840812,0,...,-12.330042,2.597328e-13,file:////172.23.62.240/lenaSamba/lena_processe...,file:////172.23.62.240/lenaSamba/lena_processe...,file:////172.23.62.240/lenaSamba/lena_processe...,file:////172.23.62.240/lenaSamba/lena_processe...,692,692,43,692
93,223989.0,10367.707930,484380.0,4267.0,771.0,771.0,0,0,1.934412,0,...,-12.381223,1.804520e-12,file:////172.23.62.240/lenaSamba/lena_processe...,file:////172.23.62.240/lenaSamba/lena_processe...,file:////172.23.62.240/lenaSamba/lena_processe...,file:////172.23.62.240/lenaSamba/lena_processe...,690,690,43,690


In [None]:
pd.read_csv('/Users/th9353/Documents/data/NSCLC/Patient Stratification/patient_groups_k4.csv')

Unnamed: 0.1,Unnamed: 0,metacluster,Patient_ID
0,175_1,1,175_1
1,175_10,2,175_10
2,175_100,3,175_100
3,175_101,3,175_101
4,175_102,1,175_102
...,...,...,...
972,88_534,3,88_534
973,88_535,2,88_535
974,88_536,2,88_536
975,88_537,1,88_537


In [None]:
pd.read_csv('/Users/th9353/Documents/data/NSCLC/cp_csv/clinical_data_ROI.csv')

Unnamed: 0.1,Unnamed: 0,X,ROI_xy,acID,RoiID,TMA.x,Tma_ac,Patient_Nr,TMA.y,X..spots,...,DFS,Ev.O,OS,Smok,Nikotin,ROI,Patient_ID,LN.Met,Dist.Met,NeoAdj
0,1,1,11,1,"175_A1,1",175A,175A_1,1.0,175.0,1.0,...,,1.0,254.0,3.0,,"A1,1",175_1,LN Metastases,No Dist. Metastases,NoNeoAdjuvantTherapy
1,2,2,12,2,"175_A1,2",175A,175A_2,5.0,175.0,9.0,...,3420.0,1.0,3420.0,1.0,20.0,"A1,2",175_5,No LN Metastases,No Dist. Metastases,NoNeoAdjuvantTherapy
2,3,3,13,3,"175_A1,3",175A,175A_3,9.0,175.0,17.0,...,1312.0,1.0,1312.0,1.0,60.0,"A1,3",175_9,No LN Metastases,No Dist. Metastases,NoNeoAdjuvantTherapy
3,4,4,14,4,"175_A1,4",175A,175A_4,13.0,175.0,25.0,...,1950.0,1.0,1950.0,3.0,,"A1,4",175_13,No LN Metastases,No Dist. Metastases,NoNeoAdjuvantTherapy
4,5,5,16,5,"175_A1,6",175A,175A_5,21.0,175.0,41.0,...,820.0,1.0,820.0,3.0,,"A1,6",175_21,No LN Metastases,No Dist. Metastases,NoNeoAdjuvantTherapy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2067,2068,100,811,116,"88_C8,11",88C,88C_116,529.0,88.0,344.0,...,1707.0,0.0,1707.0,1.0,100.0,"C8,11",88_529,No LN Metastases,No Dist. Metastases,NoNeoAdjuvantTherapy
2068,2069,101,812,117,"88_C8,12",88C,88C_117,533.0,88.0,352.0,...,1652.0,0.0,1652.0,1.0,20.0,"C8,12",88_533,LN Metastases,No Dist. Metastases,NoNeoAdjuvantTherapy
2069,2070,102,813,118,"88_C8,13",88C,88C_118,537.0,88.0,360.0,...,189.0,1.0,621.0,2.0,30.0,"C8,13",88_537,LN Metastases,No Dist. Metastases,NoNeoAdjuvantTherapy
2070,2071,103,814,119,"88_C8,14",88C,88C_119,,,,...,,,,,,,Control,,,


In [None]:
pd.read_csv('/Users/th9353/Documents/data/NSCLC/cp_csv/clinical_data_ROI.csv').columns


Index(['Unnamed: 0', 'X', 'ROI_xy', 'acID', 'RoiID', 'TMA.x', 'Tma_ac',
       'Patient_Nr', 'TMA.y', 'X..spots', 'DX.name', 'x.y.localisation', 'Age',
       'Gender', 'Typ', 'Grade', 'Size', 'Vessel', 'Pleura', 'T.new', 'N',
       'M.new', 'Stage', 'R', 'Chemo', 'Radio', 'Chemo3', 'Radio4', 'Relapse',
       'Chemo5', 'Radio6', 'DFS', 'Ev.O', 'OS', 'Smok', 'Nikotin', 'ROI',
       'Patient_ID', 'LN.Met', 'Dist.Met', 'NeoAdj'],
      dtype='object')

In [None]:
pd.read_csv('/Users/th9353/Documents/data/NSCLC/cp_csv/clinical_data_ROI.csv')['Stage']

0       5.0
1       1.0
2       2.0
3       1.0
4       2.0
       ... 
2067    2.0
2068    5.0
2069    5.0
2070    NaN
2071    NaN
Name: Stage, Length: 2072, dtype: float64

In [None]:
pd.read_csv('/Users/th9353/Documents/data/NSCLC/cp_csv/clinical_data_ROI.csv')['Patient_ID'].value_counts(dropna=False)


Control    91
88_363      3
175_1       2
87_198      2
86_116      2
           ..
86_113      1
86_133      1
86_145      1
86_149      1
87_250      1
Name: Patient_ID, Length: 1071, dtype: int64

In [None]:
pd.read_csv('/Users/th9353/Documents/data/NSCLC/cp_csv/clinical_data_ROI.csv')['ROI']

0        A1,1
1        A1,2
2        A1,3
3        A1,4
4        A1,6
        ...  
2067    C8,11
2068    C8,12
2069    C8,13
2070      NaN
2071      NaN
Name: ROI, Length: 2072, dtype: object

In [None]:
pd.read_csv('/Users/th9353/Documents/data/NSCLC/cp_csv/panel.csv')['Target'].value_counts()


Iridium                               2
CD15                                  1
CD20                                  1
Carbonic Anhydrase IX                 1
VCAM1                                 1
CD68                                  1
Histone H3                            1
Ki-67                                 1
Caveolin-1                            1
Collagen I                            1
Fibronectin                           1
CD3                                   1
CD146                                 1
MMP11                                 1
p75 (CD271)                           1
CD140b (PDGF Receptor beta)           1
Myeloperoxidase MPO                   1
CD4                                   1
vWF                                   1
CD31                                  1
anti-Human CXCL12 / SDF-1             1
CCL21 / 6Ckine                        1
Cadherin-11                           1
HLA-DR                                1
CD10                                  1


# SCE anndata

There is one single cell experiment anndata that store all cells of the NSCLC dataset. The anndata is created from the original .sce file from the publication. .h5ad file is generated in R 

In [2]:
adata = anndata.read_h5ad('/work/FAC/FBM/DBC/mrapsoma/prometex/data/NSCLC/02_processed/sce_objects/sce.h5ad')

print(adata)

AnnData object with n_obs × n_vars = 5984454 × 43
    obs: 'ImageNumber', 'CellNumber', 'Center_X', 'Center_Y', 'Area', 'MajorAxisLength', 'MinorAxisLength', 'Compartment', 'Area_Description', 'BatchID', 'Panel', 'TmaID', 'TmaBlock', 'acID', 'CellID', 'mclust', 'TMA', 'Tma_ac', 'cell_category', 'cell_type', 'cell_subtype', 'ROI_xy', 'RoiID', 'Patient_Nr', 'X..spots', 'DX.name', 'x.y.localisation', 'Age', 'Gender', 'Typ', 'Grade', 'Size', 'Vessel', 'Pleura', 'T.new', 'N', 'M.new', 'Stage', 'R', 'Chemo', 'Radio', 'Chemo3', 'Radio4', 'Relapse', 'Chemo5', 'Radio6', 'DFS', 'Ev.O', 'OS', 'Smok', 'Nikotin', 'ROI', 'Patient_ID', 'LN.Met', 'Dist.Met', 'NeoAdj', 'Area_px_Stroma', 'Area_px_Tumour', 'Area_px_Core', 'Area_mm_Stroma', 'Area_mm_Tumour', 'Area_mm_Core'
    var: 'Tube.Number', 'Metal.Tag', 'Target', 'Antibody.Clone', 'Stock.Concentration', 'Final.Concentration...Dilution', 'uL.to.add', 'X800.ul', 'Clean_Target', 'full', 'ilastik'
    uns: 'X_name'
    layers: 'c_counts', 'c_counts_asin

In [3]:
(adata.obs[(adata.obs['TmaID'] == '88') & 
           (adata.obs['TmaBlock'] == 'C') & 
           (adata.obs['acID'] == 99)])

Unnamed: 0,ImageNumber,CellNumber,Center_X,Center_Y,Area,MajorAxisLength,MinorAxisLength,Compartment,Area_Description,BatchID,...,Patient_ID,LN.Met,Dist.Met,NeoAdj,Area_px_Stroma,Area_px_Tumour,Area_px_Core,Area_mm_Stroma,Area_mm_Tumour,Area_mm_Core
88_C_99_941,104,941,335.178571,240.553571,56,10.676010,7.010708,-29.472705,,20210129,...,88_521,LN Metastases,No Dist. Metastases,NoNeoAdjuvantTherapy,67525,326592,375683,0.067525,0.326592,0.375683
88_C_99_1046,104,1046,335.833333,259.944444,72,11.674582,8.285026,-39.724056,,20210129,...,88_521,LN Metastases,No Dist. Metastases,NoNeoAdjuvantTherapy,67525,326592,375683,0.067525,0.326592,0.375683
88_C_99_1071,104,1071,365.220000,261.040000,50,11.358616,5.905746,-28.584889,,20210129,...,88_521,LN Metastases,No Dist. Metastases,NoNeoAdjuvantTherapy,67525,326592,375683,0.067525,0.326592,0.375683
88_C_99_1119,104,1119,338.222222,269.844444,45,8.926932,7.408896,-40.402246,,20210129,...,88_521,LN Metastases,No Dist. Metastases,NoNeoAdjuvantTherapy,67525,326592,375683,0.067525,0.326592,0.375683
88_C_99_1120,104,1120,377.520000,272.700000,50,13.784577,4.829556,-31.466050,,20210129,...,88_521,LN Metastases,No Dist. Metastases,NoNeoAdjuvantTherapy,67525,326592,375683,0.067525,0.326592,0.375683
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88_C_99_2564,104,2564,246.272727,487.818182,33,9.352115,4.898254,5.072404,,20210129,...,88_521,LN Metastases,No Dist. Metastases,NoNeoAdjuvantTherapy,67525,326592,375683,0.067525,0.326592,0.375683
88_C_99_2673,104,2673,73.352941,508.235294,34,7.908415,5.716977,-3.764706,,20210129,...,88_521,LN Metastases,No Dist. Metastases,NoNeoAdjuvantTherapy,67525,326592,375683,0.067525,0.326592,0.375683
88_C_99_2686,104,2686,60.909091,509.181818,44,9.601168,6.062586,-2.401454,,20210129,...,88_521,LN Metastases,No Dist. Metastases,NoNeoAdjuvantTherapy,67525,326592,375683,0.067525,0.326592,0.375683
88_C_99_1290,104,1290,34.771429,298.285714,35,8.294705,5.636000,15.724660,,20210129,...,88_521,LN Metastases,No Dist. Metastases,NoNeoAdjuvantTherapy,67525,326592,375683,0.067525,0.326592,0.375683


In [15]:
filtered_adata = adata[(
    (adata.obs['TmaID'] == '88') & 
    (adata.obs['TmaBlock'] == 'C') & 
    (adata.obs['acID'] == 99)
), :]

filtered_adata

View of AnnData object with n_obs × n_vars = 3497 × 43
    obs: 'ImageNumber', 'CellNumber', 'Center_X', 'Center_Y', 'Area', 'MajorAxisLength', 'MinorAxisLength', 'Compartment', 'Area_Description', 'BatchID', 'Panel', 'TmaID', 'TmaBlock', 'acID', 'CellID', 'mclust', 'TMA', 'Tma_ac', 'cell_category', 'cell_type', 'cell_subtype', 'ROI_xy', 'RoiID', 'Patient_Nr', 'X..spots', 'DX.name', 'x.y.localisation', 'Age', 'Gender', 'Typ', 'Grade', 'Size', 'Vessel', 'Pleura', 'T.new', 'N', 'M.new', 'Stage', 'R', 'Chemo', 'Radio', 'Chemo3', 'Radio4', 'Relapse', 'Chemo5', 'Radio6', 'DFS', 'Ev.O', 'OS', 'Smok', 'Nikotin', 'ROI', 'Patient_ID', 'LN.Met', 'Dist.Met', 'NeoAdj', 'Area_px_Stroma', 'Area_px_Tumour', 'Area_px_Core', 'Area_mm_Stroma', 'Area_mm_Tumour', 'Area_mm_Core'
    var: 'Tube.Number', 'Metal.Tag', 'Target', 'Antibody.Clone', 'Stock.Concentration', 'Final.Concentration...Dilution', 'uL.to.add', 'X800.ul', 'Clean_Target', 'full', 'ilastik'
    uns: 'X_name'
    layers: 'c_counts', 'c_counts

In [18]:
def return_single_adata_sample(all_cells_adata, TmaID, TmaBlock, acID):

    adata_sample = all_cells_adata[(
        (all_cells_adata.obs['TmaID'] == str(TmaID)) & 
        (all_cells_adata.obs['TmaBlock'] == str(TmaBlock)) & 
        (all_cells_adata.obs['acID'] == int(acID))
    ), :]

    return adata_sample



In [19]:
return_single_adata_sample(adata, str(88), str('C'), int(99))

View of AnnData object with n_obs × n_vars = 3497 × 43
    obs: 'ImageNumber', 'CellNumber', 'Center_X', 'Center_Y', 'Area', 'MajorAxisLength', 'MinorAxisLength', 'Compartment', 'Area_Description', 'BatchID', 'Panel', 'TmaID', 'TmaBlock', 'acID', 'CellID', 'mclust', 'TMA', 'Tma_ac', 'cell_category', 'cell_type', 'cell_subtype', 'ROI_xy', 'RoiID', 'Patient_Nr', 'X..spots', 'DX.name', 'x.y.localisation', 'Age', 'Gender', 'Typ', 'Grade', 'Size', 'Vessel', 'Pleura', 'T.new', 'N', 'M.new', 'Stage', 'R', 'Chemo', 'Radio', 'Chemo3', 'Radio4', 'Relapse', 'Chemo5', 'Radio6', 'DFS', 'Ev.O', 'OS', 'Smok', 'Nikotin', 'ROI', 'Patient_ID', 'LN.Met', 'Dist.Met', 'NeoAdj', 'Area_px_Stroma', 'Area_px_Tumour', 'Area_px_Core', 'Area_mm_Stroma', 'Area_mm_Tumour', 'Area_mm_Core'
    var: 'Tube.Number', 'Metal.Tag', 'Target', 'Antibody.Clone', 'Stock.Concentration', 'Final.Concentration...Dilution', 'uL.to.add', 'X800.ul', 'Clean_Target', 'full', 'ilastik'
    uns: 'X_name'
    layers: 'c_counts', 'c_counts

In [20]:
sample_identifier = {'TmaID': '86', 'TmaBlock': 'A', 'acID': '80'}
result = f"{sample_identifier['TmaID']}_{sample_identifier['TmaBlock']}_{sample_identifier['acID']}"
print(result)

86_A_80


In [37]:
adata.obs[(adata.obs['TmaID'] == '86') & 
                      (adata.obs['TmaBlock'] == 'A') & 
                      (adata.obs['acID'] == 1)].sort_values(by='CellNumber', ascending=True)

Unnamed: 0,ImageNumber,CellNumber,Center_X,Center_Y,Area,MajorAxisLength,MinorAxisLength,Compartment,Area_Description,BatchID,...,Patient_ID,LN.Met,Dist.Met,NeoAdj,Area_px_Stroma,Area_px_Tumour,Area_px_Core,Area_mm_Stroma,Area_mm_Tumour,Area_mm_Core
86_A_1_1,1,1,370.000000,7.714286,14,5.096730,3.608716,-33.227805,,2020115,...,86_1,No LN Metastases,No Dist. Metastases,NoNeoAdjuvantTherapy,334043,64252,387235,0.334043,0.064252,0.387235
86_A_1_2,1,2,309.505376,18.440860,93,11.314469,10.601507,-12.076441,,2020115,...,86_1,No LN Metastases,No Dist. Metastases,NoNeoAdjuvantTherapy,334043,64252,387235,0.334043,0.064252,0.387235
86_A_1_3,1,3,330.083333,21.770833,48,9.105835,7.226436,3.457446,,2020115,...,86_1,No LN Metastases,No Dist. Metastases,NoNeoAdjuvantTherapy,334043,64252,387235,0.334043,0.064252,0.387235
86_A_1_4,1,4,337.611111,20.500000,18,6.333792,3.709344,1.154769,,2020115,...,86_1,No LN Metastases,No Dist. Metastases,NoNeoAdjuvantTherapy,334043,64252,387235,0.334043,0.064252,0.387235
86_A_1_5,1,5,317.400000,20.200000,5,3.185641,1.879382,-5.957580,,2020115,...,86_1,No LN Metastases,No Dist. Metastases,NoNeoAdjuvantTherapy,334043,64252,387235,0.334043,0.064252,0.387235
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86_A_1_2410,1,2410,265.690476,709.428571,42,8.628360,6.586008,-203.455252,,2020115,...,86_1,No LN Metastases,No Dist. Metastases,NoNeoAdjuvantTherapy,334043,64252,387235,0.334043,0.064252,0.387235
86_A_1_2411,1,2411,256.200000,709.900000,10,5.545940,2.566331,-212.812321,,2020115,...,86_1,No LN Metastases,No Dist. Metastases,NoNeoAdjuvantTherapy,334043,64252,387235,0.334043,0.064252,0.387235
86_A_1_2412,1,2412,285.028736,714.275862,174,17.294609,13.106453,-185.929264,,2020115,...,86_1,No LN Metastases,No Dist. Metastases,NoNeoAdjuvantTherapy,334043,64252,387235,0.334043,0.064252,0.387235
86_A_1_2413,1,2413,337.533333,709.400000,15,6.659768,3.138561,-134.335939,,2020115,...,86_1,No LN Metastases,No Dist. Metastases,NoNeoAdjuvantTherapy,334043,64252,387235,0.334043,0.064252,0.387235


In [29]:
adata.obs['acID']

86_A_1_4          1
86_A_1_7          1
86_A_1_27         1
86_A_1_29         1
86_A_1_31         1
                 ..
178_C_98_2214    98
178_C_99_4       99
178_C_99_1499    99
178_C_99_2444    99
178_C_99_3006    99
Name: acID, Length: 5984454, dtype: int32

In [11]:
adata.obs.columns

Index(['ImageNumber', 'CellNumber', 'Center_X', 'Center_Y', 'Area',
       'MajorAxisLength', 'MinorAxisLength', 'Compartment', 'Area_Description',
       'BatchID', 'Panel', 'TmaID', 'TmaBlock', 'acID', 'CellID', 'mclust',
       'TMA', 'Tma_ac', 'cell_category', 'cell_type', 'cell_subtype', 'ROI_xy',
       'RoiID', 'Patient_Nr', 'X..spots', 'DX.name', 'x.y.localisation', 'Age',
       'Gender', 'Typ', 'Grade', 'Size', 'Vessel', 'Pleura', 'T.new', 'N',
       'M.new', 'Stage', 'R', 'Chemo', 'Radio', 'Chemo3', 'Radio4', 'Relapse',
       'Chemo5', 'Radio6', 'DFS', 'Ev.O', 'OS', 'Smok', 'Nikotin', 'ROI',
       'Patient_ID', 'LN.Met', 'Dist.Met', 'NeoAdj', 'Area_px_Stroma',
       'Area_px_Tumour', 'Area_px_Core', 'Area_mm_Stroma', 'Area_mm_Tumour',
       'Area_mm_Core'],
      dtype='object')

In [40]:
# unique patients IDS
adata.obs['Patient_ID'].unique()

['86_1', '86_37', '86_8', '86_12', '86_16', ..., '178_521', '178_529', '178_439', '178_428', '86_87']
Length: 1071
Categories (1071, object): ['86_1', '86_3', '86_4', '86_5', ..., '178_569', '178_570', '178_571', 'Control']

In [None]:
patients = adata.obs['Patient_ID'].unique()

# Create a dictionary to store AnnData objects for each patient
adata_per_patient = {}

for patient in patients:
    # Filter data for the specific patient
    patient_data = adata[adata.obs['Patient_ID'] == patient]
    
    # Store the new AnnData object in the dictionary
    adata_per_patient[patient] = patient_data

    pickle_filename = f"adata_patient_{patient}.pkl"
    with open(pickle_filename, 'wb') as f:
        pickle.dump(patient_data, f)

In [16]:
adata_per_patient

{'86_1': View of AnnData object with n_obs × n_vars = 1006 × 43
     obs: 'ImageNumber', 'CellNumber', 'Center_X', 'Center_Y', 'Area', 'MajorAxisLength', 'MinorAxisLength', 'Compartment', 'Area_Description', 'BatchID', 'Panel', 'TmaID', 'TmaBlock', 'acID', 'CellID', 'mclust', 'TMA', 'Tma_ac', 'cell_category', 'cell_type', 'cell_subtype', 'ROI_xy', 'RoiID', 'Patient_Nr', 'X..spots', 'DX.name', 'x.y.localisation', 'Age', 'Gender', 'Typ', 'Grade', 'Size', 'Vessel', 'Pleura', 'T.new', 'N', 'M.new', 'Stage', 'R', 'Chemo', 'Radio', 'Chemo3', 'Radio4', 'Relapse', 'Chemo5', 'Radio6', 'DFS', 'Ev.O', 'OS', 'Smok', 'Nikotin', 'ROI', 'Patient_ID', 'LN.Met', 'Dist.Met', 'NeoAdj', 'Area_px_Stroma', 'Area_px_Tumour', 'Area_px_Core', 'Area_mm_Stroma', 'Area_mm_Tumour', 'Area_mm_Core'
     var: 'Tube.Number', 'Metal.Tag', 'Target', 'Antibody.Clone', 'Stock.Concentration', 'Final.Concentration...Dilution', 'uL.to.add', 'X800.ul', 'Clean_Target', 'full', 'ilastik'
     uns: 'X_name'
     layers: 'c_count

# Masks

In [52]:
np.unique(imread('/work/FAC/FBM/DBC/mrapsoma/prometex/data/NSCLC/01_raw/Cell_masks/86_A_mask/2020115_LC_NSCLC_TMA_86_A_s0_a1_ac_ilastik_s2_Probabilitiescells_mask.tiff'))


array([   0,    1,    2, ..., 2412, 2413, 2414], dtype=uint16)