# Tutorial 3: How to get one example FOV from each cell line

The quilt data package (hipsc_single_cell_image_dataset) contains 216062 single cells segmented from 18186 field-of-view (FOV) with selected features calculated for each cell. In this tutorial, we will show **how to get one example FOV from each cell line**, which can be used for testing your image processing algorithm on different structures.

In [None]:
######### FOR google COLAB user only #########
### install necessary packages if in colab ###
##############################################

############################################################
### make sure to restart runtime after running this step ###
############################################################
def run_subprocess_command(cmd):
    process = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE)
    for line in process.stdout:
        print(line.decode().strip())

import sys, subprocess

IN_COLAB = "google.colab" in sys.modules
colab_requirements = [
    "pip install urllib3==1.25.4",
    "pip install PyYAML==5.1",
    "pip install quilt3",
    "pip install aicsimageio",
]
if IN_COLAB:
    for i in colab_requirements:
        run_subprocess_command(i)

In [None]:
import pandas as pd
import numpy as np
import quilt3
from pathlib import Path
from aicsimageio import AICSImage
from aicsimageio.writers import OmeTiffWriter

## Step 1: connect to the data storage

In [None]:
# connect to quilt
pkg = quilt3.Package.browse("aics/hipsc_single_cell_image_dataset", registry="s3://allencell")
meta_df = pkg["metadata.csv"]()

In [None]:
# a quick look at what are the columns 
print(meta_df.columns)

## step 2: find all different cell lines

In [None]:
meta_df["structure_name"].unique()

In [None]:
num_sample = 1
data = meta_df.groupby("structure_name", group_keys=False)
data = data.apply(pd.DataFrame.sample, n=num_sample)
data = data.reset_index(drop=True)

In [None]:
data.shape

## step 3: download data

In [None]:
# prepare file paths
save_path = Path("C:/projects/allen_cell_data/")
save_path.mkdir(exist_ok=True)
raw_path = save_path / Path("raw_image")
raw_path.mkdir(exist_ok=True)

In [None]:
for row in data.itertuples():
    subdir_name = row.fov_path.split("/")[0]
    file_name = row.fov_path.split("/")[1]
    local_fn = raw_path / f"{row.structure_name}_{row.FOVId}_ch_{row.ChannelNumberStruct}_original.tiff"
    pkg[subdir_name][file_name].fetch(local_fn)