In [None]:
import sys
sys.path.append('~3D_IMC_paper/Python/mcd_preprocessing/imctools-master')
from imctools.scripts import ometiff2analysis
from imctools.scripts import imc2tiff
from imctools.scripts import convertfolder2imcfolder 
from imctools.scripts import exportacquisitioncsv
from imctools.io import ometiffparser

In [None]:
import pandas as pd
import os
import re
import zipfile

# Important: This notebook is based on imctools v1. Some function names have been changed in imctools v2 and thus this code might not run with newer imctools packages.

### The 3D IMC preprocessing pipeline for multiplexed image analysis

This workflow is based on standard IMC preprocessing pipeline to convert mcd files into omeTIFF files, and into single channel TIFF stacks.
This workflow is no different from the standard IMC mcd into omeTIFF conversion, but few modifications have been added for the conversion of omeTIFF files into analysis TIFF stacks.
The following convenience modifications have been included:
* The CSV antibody panel file should contain the column 'singleTIFFs' to only choose the channels that will be used in the final 3D model. Also this table should only included unique channel names on each row.
* ImcAcquisition function (imctools v1) has been modified such that images are made the same size across the 3D stack. The maximum omeTIFF image size is determined first before adding zero-padding to images.

* ometiff_2_analysis has  been changed not to produce supporting panel csv files as these are not needed for 3D IMC segmentation (standard IMC pipeline needs these for CellProfiler).

* ome2singletiff function has been changed so that not to have channel names in the file name. This was done to allow easier access of files that is uniform across models.




### Input folders

In [None]:
# the folders with the ziped acquisition files for the analysis

folders =['~/registration_LVI_test_model/LVI_breast_cancer']
#name of the zip folder without the extension (without .zip)
model_name = '20190220_lvi03_bc_test02'


# part that all considered files need to have in common
file_regexp = '.*.zip'

# output for OME tiffs
folder_base = '~/registration_LVI_test_model/LVI_breast_cancer/'

# pannel
csv_pannel = '~/registration_LVI_test_model/LVI_breast_cancer/BreastCancer_LVI03_Test02.csv'

csv_pannel_metal = 'Metal Tag'
#column with channels that work and should be included in the final model
csv_pannel_singleTIFF = 'singleTIFFs'

# full column: Contains the channels that should be quantified/measured in cellprofiler
csv_pannel_full = 'full'

do_singleTiffs = True

singleTiffs_folder = folder_base + '3Dstack_single_channel_tiffs'

In [None]:
folder_ome = os.path.join(folder_base, 'ometiff')
folder_final_registration = os.path.join(folder_base, '3D_registred_tiffs')
singleTiffs_folder = os.path.join(folder_base, '3Dstack_single_channel_tiffs')

#this is the path to the full omeTiffs as this script will create a subfolder when creating the ome-s.
out_tiff_folder = folder_ome + '/'+ model_name+'/'

suffix_full = '_full'

failed_images = list()

# Make a list of all the analysis stacks with format:
# (CSV_NAME, SUFFIX, ADDSUM)
# CSV_NAME: name of the column in the CSV to be used
# SUFFIX: suffix of the tiff
# ADDSUM: BOOL, should the sum of all channels be added as the first channel?
list_analysis_stacks =[(csv_pannel_full, suffix_full, 0)]


Generate all the folders if necessary

In [None]:
for fol in [folder_base,folder_ome,folder_final_registration, singleTiffs_folder]:
    if not os.path.exists(fol):
        os.makedirs(fol)

#create registration subfolder (for full stack and single channel registration)
full_stack_reg_folder = os.path.join(folder_final_registration, 'IMC_fullStack_registred')
single_channel_reg_folder = os.path.join(folder_final_registration, 'singleChanelRegistration')

for fol in [full_stack_reg_folder,single_channel_reg_folder,]:
    if not os.path.exists(fol):
        os.makedirs(fol)



### Convert zipped IMC acquisitions to input format (standard IMC pipeline)

This script works with zipped IMC acquisitions:
Each acquisition session = (1 mcd file) should be zipped in a folder containing:
- The `.mcd` file
- All associated `.txt` file generated during the acquisition of this `.mcd` file -> Don't change any of the filenames!!

Convert mcd containing folders into imc zip folders

In [None]:
%%time
failed_images = list()
re_fn = re.compile(file_regexp)

for fol in folders:
    for fn in os.listdir(fol):
        if re_fn.match(fn):
            fn_full = os.path.join(fol, fn)
            print(fn_full)
            try:
                convertfolder2imcfolder.convert_folder2imcfolder(fn_full, out_folder=folder_ome,
                                                                   dozip=False)
            except:
                print('Failed Folder: ' + fn_full)

In [None]:
#if this file already exists then command fails
exportacquisitioncsv.export_acquisition_csv(folder_ome, folder_base)

### Generate the analysis stacks. The following functions were modified for 3D IMC workflow

In [None]:
import os
import numpy as np
from imctools.io.imcacquisition import ImcAcquisition
from imctools.io.imcacquisitionbase import ImcAcquisitionBase

class ImcAcquisition3D(ImcAcquisition):
    """
     An Image Acquisition Object representing a single acquisition. modified the initial code to allow the user to add background pixels to change image size
        so that all the multichannel tiffs in the stack are the same size.
    """

    def __init__(self, other, max_dimensions=None):
        """
        :param filename:
        """
        ImcAcquisitionBase.__init__(self, other.image_ID, other.original_file, other._data, 
                                    other._channel_metals, other._channel_labels,
                                    other.original_metadata, other.image_description, 
                                    other.origin, other._offset)
        self._max_dimensions = max_dimensions

    def get_img_stack_cyx(self, channel_idxs=None, offset=None):
        """
        Return the data reshaped as a stack of images
        :param: channel_idxs
        :return:
        """
        if offset is None:
            offset = self._offset
        
        if channel_idxs is None:
            channel_idxs = range(self.n_channels)
        
        data = self._data
        max_dimensions = self._max_dimensions
        #modified the following code to allow the user to add background pixels to change image size
        #so that all the multichannel tiffs in the stack are the same size.
        img = []
        for i in channel_idxs:
            img_i = data[i+offset]
            if max_dimensions is not None:
                
                if len(img_i.shape) == 2:
                    xysize = img_i.shape
                    
                else:
                    return "It should be a 2D array per one channel"
                                      
                if len(max_dimensions) == 2:
                    ymax = max_dimensions[1]
                    xmax = max_dimensions[0]
                                        
                else:
                    return "Padding should have input as list of two arguments:maximum image size mxn\
                            in x (columns of matrix) and in y (rows of matrix) "
                
                #might require more checks if x or y equals zero
                #if ymax-xysize[0] == 0 and xmax-xysize[1] == 0:
                    
                #    img.append(img_i)
                
                #else:
                img_i= np.pad(img_i,[(0,ymax-xysize[0]), (0,xmax-xysize[1])], 'constant')
                
                img.append(img_i)
                
            else:
                img.append(img_i)
        
        return img

In [None]:
#change the function to generate the folder for registration with no csv metadata files

def ometiff_2_analysis_noCSV(filename, outfolder, basename, pannelcsv=None, metalcolumn=None, masscolumn=None, usedcolumn=None,
                       addsum=False, bigtiff=True, sort_channels=True, pixeltype=None):
    # read the pannelcsv to find out which channels should be loaded
    selmetals = None
    selmass = None

    outname = os.path.join(outfolder, basename)
    if pannelcsv is not None:

        pannel = pd.read_csv(pannelcsv)
        if pannel.shape[1] > 1:
            selected = pannel[usedcolumn]
            if masscolumn is None:
                metalcolumn = metalcolumn
                selmetals = [str(n) for s, n in zip(selected, pannel[metalcolumn]) if s]
            else:
                selmass = [str(n) for s, n in zip(selected, pannel[masscolumn]) if s]
        else:
            selmetals = [pannel.columns[0]] + pannel.iloc[:,0].tolist()
            
    ome = ometiffparser.OmetiffParser(filename)
    imc_img = ImcAcquisition3D(ome.get_imc_acquisition(),max_dimensions = [col_max, row_max])
    
    if sort_channels:
        if selmetals is not None:
            def mass_from_met(x):
                return (''.join([m for m in x if m.isdigit()]), x)
            selmetals = sorted(selmetals, key=mass_from_met)
        if selmass is not None:
            selmass = sorted(selmass)

    writer = imc_img.get_image_writer(outname + '.tiff', metals=selmetals, mass=selmass)
 
    
    if addsum:
        img_sum = np.sum(writer.img_stack, axis=2)
        img_sum = np.reshape(img_sum, list(img_sum.shape)+[1])
        writer.img_stack = np.append(img_sum, writer.img_stack, axis=2)

    writer.save_image(mode='imagej', bigtiff=bigtiff, dtype=pixeltype)

    if selmass is not None:
        savenames = selmass

    elif selmetals is not None:
        savenames = selmetals
    else:
        savenames = [s for s in imc_img.channel_metals]

    if addsum:
        savenames = ['sum'] + savenames
    #The following three lines were commented out, and no commandline arguments were generated
    #with open(outname + '.csv', 'w') as f:
    #    for n in savenames:
    #        f.write(n + '\n')

In [None]:
# original 'ome2singletiff' functions generates file names with channel name, but easier to work if only metal
# in the file name => use modified function

from imctools.io import ometiffparser

def ome2singletiff_noChannel(path_ome, outfolder, pannelcsv=None, metalcolumn=None, usedcolumn=None, basename=None, dtype=None):
    """
    Saves the planes of an ome stack as a folder
    :param fn_ome:
    :param outfolder:
    :return:
    """
    #padding into target dimension
    ome = ometiffparser.OmetiffParser(path_ome)
    imc_img = ImcAcquisition3D(ome.get_imc_acquisition(), max_dimensions = [col_max, row_max])
    
    if basename is None:
        fn_new = os.path.split(path_ome)[1].rstrip('.ome.tiff') + '_'
        
    else:
        fn_new = basename
    
    pannel = pd.read_csv(pannelcsv)
    
    if pannel.shape[1] > 1:
            selected = pannel[usedcolumn]
            metalcolumn = metalcolumn
            selmetals = [str(n) for s, n in zip(selected, pannel[metalcolumn]) if s]
    for label, metal in zip(imc_img.channel_labels, imc_img.channel_metals):
        
        if metal in selmetals:
            
            label = re.sub('[^a-zA-Z0-9]', label,'-')
            new_path = os.path.join(outfolder, fn_new+'_'+metal)
            writer = imc_img.get_image_writer(new_path + '.tiff', metals=[metal])
            writer.save_image(mode='imagej', dtype=dtype)
        
        else:
           
            print('This channel had no antibody or did not work, therefore not converted:', metal)
            continue

In [None]:
#read the the single channel tiffs to find out the max dimensions of the 3D slices for downstream padding

from tifffile import imread

img_dimy= []
img_dimx = []
for fol in os.listdir(folder_ome):
    sub_fol = os.path.join(folder_ome, fol)
    for img in os.listdir(sub_fol):
        print(img)
        if not img.endswith('.ome.tiff'):
            continue
        img =  imread(os.path.join(sub_fol, img))
    
        img_dim = img.shape
        img_dimy.append(img.shape[1])
        img_dimx.append(img.shape[2])

        
print(max(img_dimy))
print(max(img_dimx))

In [None]:
row_max= max(img_dimy)
col_max= max(img_dimx)

In [None]:
#the function 'ometiff_2_analysis' also generates metadata files such as panorama png files, xml, and csv files
#for generating single channel tiffs need check that only tiff files are read from the folder: in csv file call
#the column 'singleTiffs'

if do_singleTiffs:
    if not(os.path.exists(singleTiffs_folder)):
        os.makedirs(singleTiffs_folder)
    for file in [f for f in os.listdir(out_tiff_folder) if f.endswith('.ome.tiff')]:
        print(file)
        singleTiffs = singleTiffs_folder+'/'+ file

        if not(os.path.exists(singleTiffs)):
            os.makedirs(singleTiffs)
            
        ome_file = out_tiff_folder+file
        # add arguments to only have channels with antibody staining
        # pannelcsv, usedcolumn, metalcolumn,
        ome2singletiff_noChannel(ome_file, outfolder = singleTiffs, pannelcsv = csv_pannel,
                                 metalcolumn = csv_pannel_metal, usedcolumn = csv_pannel_singleTIFF, 
                                 dtype='uint16')

Notebook finished