# Single Cell Decompression Training

## Import Libraries

First, we import the neccessary libraries.

In [1]:
# Import libraries
import anndata as ad
from pathlib import Path
import errno
import os
# import numpy as np
# import pandas as pd

## Helper Functions

Next, we import some helper functions including the cisi segementation training function.

In [5]:
# Helper fncs
import analysis_utils


## CISI
# Import system libraries to configure code directory as module
from os.path import dirname, abspath, join
import sys

# Find code directory relative to our directory
THIS_DIR = dirname('__file__')
CODE_DIR = abspath(join(THIS_DIR, '..', 'code'))
# Add code directory to systems paths
sys.path.append(CODE_DIR)

# Import CISI training fnc.
from train_dictionary_and_compositions import train_U_and_A

## Inputs

In the first part we specify the paths to the input files (.h5ad files created from R SpatialExperiment, TIFF files generated by the steinbock pipeline and panel metadata file) and where the outputs should be stored.

In [6]:
# Specify input paths
training_data_path = Path('/mnt/bb_dqbm_volume/data/Tonsil_th152')
spe_path = Path(os.path.join(training_data_path, 'preprocessed_data/spe.h5ad'))
tiffs_path = Path(os.path.join(training_data_path, 'steinbock/img'))
panel_path = Path(os.path.join(training_data_path, 'steinbock/panel.csv'))

# Specify output path
out_path = Path(os.path.join(training_data_path, 'training'))

# Create output directory if it doesn't exist
out_path.mkdir(parents=True, exist_ok=True)

In [7]:
# Check that input files/dictionary exist
if not analysis_utils.is_valid_file(spe_path, ['.h5ad']):
    # If file is not found, throw error
    raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT),
                            spe_path)
if not analysis_utils.is_valid_file(panel_path, ['.csv']):
    # If file is not found, throw error
    raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT),
                            panel_path)
if not analysis_utils.is_valid_directory(tiffs_path):
    # If directory is not found or doesn't contain TIFF files, throw an error
    raise Exception('Input TIFFS path {0} directory does not exist or does\
    not contain any valid TIFF files.'.format(tiffs_path)) from FileNotFoundError

Next, we read in the input files for training. For this we have the Tonsil th152 dataset consisting of 5 ROIs and we read the data in once it has been processed by steinbock into segmented single cells and once using the hot pixel corrected TIFF image files directly. From this we create one numpy array of channels x cells and one numpy array containing channels vs pixels.

In [8]:
# Read in SpatialExperiment converted to anndata by cellconverter in R
spe = ad.read_h5ad(spe_path)
print(spe)

AnnData object with n_obs × n_vars = 162624 × 43
    obs: 'sample_id', 'ObjectNumber', 'area', 'major_axis_length', 'minor_axis_length', 'eccentricity', 'width_px', 'height_px', 'ROI'
    var: 'channel', 'name', 'keep', 'ilastik', 'deepcell', 'Tube.Number', 'Metal', 'use_channel'
    uns: 'X_name'
    layers: 'exprs', 'log_exprs'


In [10]:
images = analysis_utils.anndata_from_tiff(tiffs_path, panel_path)
print(images)


AnnData object with n_obs × n_vars = 9250000 × 43
    obs: 'sample_id', 'ObjectNumber', 'ROI'
    var: 'channel', 'name', 'keep', 'ilastik', 'deepcell', 'Tube Number', 'Metal'


In [None]:
train_U_and_A(spe, os.path.join(training_data_path, 'training/segmentation/sce'), 
              test_set=('20220520_TsH_th152_cisi1_001',))

on 2: 10 measurements                                                                                                   
on 2: 0 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.                                        
       0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
       0. 0.]
on 2: 100 [0.16974001 0.18408223 0.21928707 0.22142798 0.16865808 0.18413018                                                       | ▂▄▆ 2/4 [50%] in 1:35 (0.0/s, eta: 10s) ▄▂▂ 2/4 [50%] in 2:20 (0.0/s, eta: 10s) 
       0.21082242 0.18299919 0.24525927 0.19746585 0.20086209 0.21878983
       0.22484721 0.16608197 0.1892551  0.189547   0.16233854 0.17607533
       0.21071741 0.2153271  0.18732127 0.16899833 0.16367787 0.17052181
       0.20973286 0.19817968 0.17188679 0.19962898 0.20477844 0.16203738
       0.19299765 0.17780573 0.1772823  0.18316254 0.22652478 0.22010189
       0.2502961  0.20353152 0.16847974 0.24847728 0.1756802  0.18096602
       0.1786

on 2: 1200 [0.23362891 0.24163992 0.27546177 0.27392364 0.23431336 0.23455472                                           a: 10s)  2/4 [50%] in 26:18 (0.0/s, eta: 10s) 
       0.23978485 0.24894849 0.24525927 0.25811183 0.24090412 0.23942409
       0.23826665 0.2445304  0.24400442 0.24003167 0.23712476 0.24697534
       0.26674387 0.23999726 0.2554887  0.26113047 0.23543348 0.24087695
       0.24736789 0.2350834  0.23940331 0.2402011  0.24930543 0.24821141
       0.23484393 0.26631842 0.23971731 0.26732103 0.23762775 0.24213809
       0.2502961  0.24470112 0.24808702 0.24847728 0.24985444 0.23515167
       0.25045863 0.23274398 0.24697752 0.23448797 0.24195283 0.2502103
       0.23310116 0.23242897]
on 2: 1300 [0.23362891 0.24163992 0.27546177 0.27392364 0.23431336 0.23455472                                           
       0.23978485 0.24894849 0.24525927 0.25811183 0.24090412 0.23942409
       0.23826665 0.2445304  0.24400442 0.24003167 0.23712476 0.24697534
       0.26674387 0.239997