# Tutorial 2: How to get all segmentation from one cell line

The quilt data package (hipsc_single_cell_image_dataset) contains 216062 single cells segmented from 18186 field-of-view (FOV) with selected features calculated for each cell. In this tutorial, we will show **how to get all the segmentation and raw images from one cell line**, which can be used for doing image analysis research of one specific structure or for analyzing one specific cell line in a customized way.

In [None]:
######### FOR google COLAB user only #########
### install necessary packages if in colab ###
##############################################

############################################################
### make sure to restart runtime after running this step ###
############################################################
def run_subprocess_command(cmd):
    process = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE)
    for line in process.stdout:
        print(line.decode().strip())

import sys, subprocess

IN_COLAB = "google.colab" in sys.modules
colab_requirements = [
    "pip install urllib3==1.25.4",
    "pip install PyYAML==5.1",
    "pip install quilt3",
    "pip install aicsimageio",
]
if IN_COLAB:
    for i in colab_requirements:
        run_subprocess_command(i)

In [None]:
import pandas as pd
import quilt3
from pathlib import Path
from aicsimageio import AICSImage
from aicsimageio.writers import OmeTiffWriter

## Step 1: connect to the data storage

In [None]:
# connect to quilt
pkg = quilt3.Package.browse("aics/hipsc_single_cell_image_dataset", registry="s3://allencell")
meta_df = pkg["metadata.csv"]()

In [None]:
# a quick look at what are the columns 
print(meta_df.columns)

## step 2: select one cell line

In [None]:
# we use lamin B1 cell line for example (structure_name=='LMNB1')
meta_df_lamin = meta_df.query("structure_name=='LMNB1'")

# collapse the data table based on FOVId
meta_df_lamin.drop_duplicates(subset="FOVId", inplace=True)

# reset index
meta_df_lamin.reset_index(drop=True, inplace=True)

## step 3: download data

### use case 1: download all lamin B1 images and its structure segmentations

In [None]:
# prepare file paths
save_path = Path("C:/projects/allen_cell_data/")
save_path.mkdir(exist_ok=True)
raw_path = save_path / Path("raw_image")
raw_path.mkdir(exist_ok=True)
structure_path = save_path / Path("structure")
structure_path.mkdir(exist_ok=True)
seg_path = save_path / Path("structure_segmentation")
seg_path.mkdir(exist_ok=True)

# download all FOVs or a certain number
num = 5 # or num = row.shape[0]
for row in meta_df_lamin.itertuples():
    
    if row.Index >= num:
        break
    
    # fetch the raw image
    subdir_name = row.fov_path.split("/")[0]
    file_name = row.fov_path.split("/")[1]
    
    local_fn = raw_path / f"{row.FOVId}_original.tiff"
    pkg[subdir_name][file_name].fetch(local_fn)
    
    # extract the structure channel
    structure_fn = structure_path / f"{row.FOVId}.tiff"
    reader = AICSImage(local_fn)
    with OmeTiffWriter(structure_fn) as writer:
        writer.save(
            reader.get_image_data("ZYX", C=row.ChannelNumberStruct, S=0, T=0),
            dimension_order='ZYX'
        )
        
    # fetch structure segmentation
    subdir_name = row.struct_seg_path.split("/")[0]
    file_name = row.struct_seg_path.split("/")[1]
    
    seg_fn = seg_path / f"{row.FOVId}_segmentation.tiff"
    pkg[subdir_name][file_name].fetch(seg_fn)


### use case 2: download all lamin B1 images and its cell and nuclear segmentation

In [None]:
# prepare file paths
save_path = Path("C:/projects/allen_cell_data/")
save_path.mkdir(exist_ok=True)
raw_path = save_path / Path("raw_image")
raw_path.mkdir(exist_ok=True)
dye_path = save_path / Path("dye")
dye_path.mkdir(exist_ok=True)
seg_path = save_path / Path("fov_segmentation")
seg_path.mkdir(exist_ok=True)

# download all FOVs or a certain number
num = 5 # or num = row.shape[0]
for row in meta_df_lamin.itertuples():
    
    # fetch the raw image
    subdir_name = row.fov_path.split("/")[0]
    file_name = row.fov_path.split("/")[1]
    
    local_fn = raw_path / f"{row.FOVId}_original.tiff"
    pkg[subdir_name][file_name].fetch(local_fn)
    
    # extract the membrane dye and DNA dye channels
    dye_fn = dye_path / f"{row.FOVId}.tiff"
    reader = AICSImage(local_fn)
    with OmeTiffWriter(dye_fn) as writer:
        writer.save(
            reader.get_image_data(
                "CZYX",
                C=[row.ChannelNumber638, row.ChannelNumber405],
                S=0,
                T=0
            ),
            dimension_order='ZYX'
        )
        
    # fetch fov segmentation
    subdir_name = row.fov_seg_path.split("/")[0]
    file_name = row.fov_seg_path.split("/")[1]
    
    seg_fn = seg_path / f"{row.FOVId}_segmentation.tiff"
    pkg[subdir_name][file_name].fetch(seg_fn)