In [2]:
import os
import re
root = "/broad/macosko/pipelines"

def podman_sbatch_wrapper(command, logpath, jobname, mem, cpus=8, time="24:00:00"):
    assert "'" not in command and '"' not in command # can they be escaped?
    
    podman_command = f"podman run --rm --init --pull never -v {root}:{root}:rw pipeline-image '{command}'"
    sbatch_params = f"-C container -o {logpath} -J {jobname} \
                      --mem {mem} -c {cpus} -t {time} \
                      --mail-user macosko-pipelines@broadinstitute.org --mail-type END,FAIL,REQUEUE,INVALID_DEPEND,STAGE_OUT,TIME_LIMIT"
    if int(mem[:-1]) > 500:
        sbatch_params = "--partition=hpcx_macosko " + sbatch_params
    cmd = f'sbatch {sbatch_params} --wrap "{podman_command}"'
    
    return(cmd)

In [3]:
def compute_fastq_size(bcl, regex, mult):
    fastq_path = f'/broad/macosko/pipelines/fastqs/{bcl}'
    assert os.path.isdir(fastq_path)
    fastqs = [os.path.join(fastq_path, f) for f in os.listdir(fastq_path)]
    fastqs = [fastq for fastq in fastqs if fastq.endswith(".fastq.gz")]
    fastqs = [fastq for fastq in fastqs if re.compile(regex).search(fastq)]
    assert len(fastqs) >= 2
    fastq_size_bytes = sum(os.path.getsize(fastq) for fastq in fastqs)
    mult_size_gb = round(fastq_size_bytes/1024/1024/1024 * mult)
    mem_size = f"{max(16,mult_size_gb)}G"
    return mem_size

### Pipeline methods

In [4]:
# TODO: add support for variable lanes (--lanes)
# # https://www.10xgenomics.com/support/software/cell-ranger/latest/resources/cr-command-line-arguments
def cellranger_count(bcl, index, transcriptome, chemistry="auto"):
    assert bcl in os.listdir("/broad/macosko/pipelines/fastqs")
    assert transcriptome in os.listdir("/broad/macosko/pipelines/references")
    if os.path.isdir(f'/broad/macosko/pipelines/cellranger-count/{bcl}/{index}'):
        print("Output already exists, run this command:")
        print(f"rm -rf /broad/macosko/pipelines/cellranger-count/{bcl}/{index}")
        assert False
    cmd = f"/broad/macosko/pipelines/scripts/cellranger-count.sh {bcl} {index} {transcriptome} {chemistry}"
    print(cmd)

In [5]:
def recon_count(bcl, index, lane=0, bc1=0, bc2=0, p=1.0):
    assert bcl in os.listdir("/broad/macosko/pipelines/fastqs")
    assert not re.search(r"\s", bcl)
    assert not re.search(r"\s", index)
    assert isinstance(lane, int) and 0 <= lane <= 8 # 0 means all lanes
    assert type(bc1) == type(bc2) == int
    assert 0 < p <= 1
    
    regex = rf"{index}.*" + (rf"_L00{lane}_.*" if lane > 0 else "")
    name = f"{index}" + (f"-{lane}" if lane > 0 else "") + (f"_p-{p}" if p<1 else "") + (f"_bc1-{bc1}" if bc1 > 0 else "") + (f"_bc2-{bc2}" if bc2 > 0 else "")
    out_dir = f"{root}/recon/{bcl}/{name}"
    log_dir = f"{root}/logs/{bcl}/{name}"
    
    # Get the size of the fastqs
    mem = compute_fastq_size(bcl, regex, 2)
    
    # Create the sbatch command
    julia_command = f"julia --threads 8 --heap-size-hint={mem} {root}/scripts/recon-count.jl {root}/fastqs/{bcl} {out_dir} -r {regex} -p {p} -x {bc1} -y {bc2}"
    python_command = f"micromamba run python {root}/scripts/knn.py -i {out_dir} -o {out_dir} -n 150 -b 2 -c 8 -k 2"
    command = f"{julia_command} ; {python_command}"
    
    cmd = podman_sbatch_wrapper(command,
                                logpath = f"{log_dir}/recon-count.log",
                                jobname = f"recon-count-{bcl}-{name}",
                                mem=mem, cpus=8, time="24:00:00")

    os.makedirs(out_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)
    return(cmd)

In [6]:
def recon(bcl, index, lane=0, bc1=0, bc2=0, p=1.0, n_neighbors=45, local_connectivity=1, spread=1, min_dist=0.1, n_epochs=1500):
    assert bcl in os.listdir("/broad/macosko/pipelines/fastqs")
    assert not re.search(r"\s", bcl)
    assert not re.search(r"\s", index)
    assert isinstance(lane, int) and 0 <= lane <= 8 # 0 means all lanes
    assert type(bc1) == type(bc2) == int
    assert 0 < p <= 1
    
    name = f"{index}" + (f"-{lane}" if lane > 0 else "") + (f"_p-{p}" if p<1 else "") + (f"_bc1-{bc1}" if bc1 > 0 else "") + (f"_bc2-{bc2}" if bc2 > 0 else "")
    in_dir = f"{root}/recon/{bcl}/{name}"
    out_dir = f"{root}/recon/{bcl}/{name}"
    log_dir = f"{root}/logs/{bcl}/{name}"
    assert os.path.isdir(in_dir)

    matrix_gb = os.path.getsize(f"{in_dir}/matrix.csv.gz")/1024/1024/1024
    mem = f"{round(max(matrix_gb*25,16))}G"

    python_command = f"micromamba run python {root}/scripts/recon.py -i {in_dir} -o {out_dir} -c 8 -b 2 -nn {n_neighbors} -lc {local_connectivity} -ne {n_epochs} -s {spread} -md {min_dist}"
    cmd = podman_sbatch_wrapper(python_command,
                                logpath = f"{log_dir}/recon.log",
                                jobname = f"recon-{bcl}-{name}",
                                mem=mem, cpus=8, time="72:00:00")
    
    os.makedirs(out_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)
    return(cmd)

In [7]:
# TODO: Multiple pucks
def spatial_count(bcl, index, puck_dir, p=1.0):
    assert bcl in os.listdir("/broad/macosko/pipelines/fastqs")    
    assert not re.search(r"\s", bcl)
    assert not re.search(r"\s", index)
    assert not re.search(r"\s", puck_dir)
    assert 0 < p <= 1
    
    # Get the size of the fastqs
    mem_size = compute_fastq_size(bcl, index, 1.5)

    # Create the sbatch command
    out_dir = f"{index}" + (f"_p-{p}" if p<1 else "")
    julia_command = f"julia --heap-size-hint={mem_size} {root}/scripts/spatial-count.jl {root}/fastqs/{bcl} {puck_dir} {root}/spatial-count/{bcl}/{out_dir} -r {index} -p {p}"
    sbatch_params = f"--nodelist=slurm-bits-d[002-005] -C container -o {root}/logs/{bcl}/{out_dir}/spatial-count.log -J spatial-count-{bcl}-{index} \
                      -c 9 --mem {mem_size} --time 24:00:00 \
                      --mail-user macosko-pipelines@broadinstitute.org --mail-type END,FAIL,REQUEUE,INVALID_DEPEND,STAGE_OUT,TIME_LIMIT"
    q="'"
    cmd = f'sbatch {sbatch_params} --wrap "podman run --rm -v {root}:{root} -v /broad/macosko:/broad/macosko:ro pipeline-image {q}{julia_command}{q}"'

    os.makedirs(f"{root}/spatial-count/{bcl}/{out_dir}", exist_ok=True)
    os.makedirs(f"{root}/logs/{bcl}/{out_dir}", exist_ok=True)
    return(cmd)

### Runs

In [3]:
bcl = "240826_SL-EXF_0202_B22K77GLT3"
recon(bcl, "D701")
#recon_count(bcl, "D701", bc1=2144886, bc2=4381631)

NameError: name 'os' is not defined

In [20]:
bcl = "240808_SL-EXC_0341_A22CTCMLT4"
cellranger_count(bcl, "SI-TT-C8", "refdata-gex-GRCh38-2024-A")
# COMPLETE

/broad/macosko/data/discopipeline/scripts/cellranger-count.sh 240808_SL-EXC_0341_A22CTCMLT4 SI-TT-C8 refdata-gex-GRCh38-2024-A auto


In [4]:
bcl = "240911_SL-EXD_0362_A22FLV2LT4"
#[cellranger_count(bcl, index, "refdata-gex-GRCh38-2024-A", chemistry="auto") for index in ["SI-TT-E"+str(i+1) for i in range(8)]];

In [3]:
bcl = "240923_SL-EXH_0185_B22G7HYLT4"
indexes = ["D701_S1_L003", "D701_S1_L005", "D702", "D703", "D704", "D705", "D706", "D707", "D708", "D709", "D710"]
# [recon_count(bcl, index) for index in indexes];
# PROCESSING

In [2]:
bcl = "241002_SL-EXH_0192_A22NLKLLT3"
# [cellranger_count(bcl, index, "refdata-gex-GRCh38-2024-A", chemistry="auto") for index in ["SI-TT-B9", "SI-TT-B10"]];
# spatial_count(bcl, "SI-TT-G3", puck_dir, p=1.0)
# spatial_count(bcl, "SI-TT-G4", puck_dir, p=1.0)

In [10]:
bcl = "241029_SL-EXF_0249_A22J235LT4"
RNA_indexes = ["SI-TT-"+x for x in ["C8","C9","C10","C11","C12","D9","D10","D11"]]
SB_indexes = ["SI-TT-"+x for x in ["A11","A12","H11","H12","E9","E10","E11","E12"]]
# [cellranger_count(bcl, index, "refdata-gex-GRCh38-2024-A", chemistry="auto") for index in RNA_indexes];
# [spatial_count(bcl, index, "recon") for index in SB_indexes];

# [positioning(RNA_bcl, RNA_index, SB_bcl, SB_index) for (RNA_index,SB_index) in zip(RNA_indexes,SB_indexes)]


In [14]:
bcl = "241105_SL-EXH_0206_B22TMH7LT3"
# [recon_count(bcl, index) for index in ["D701","D702","D703","D704","D707","D708"]]
# for i in [recon(bcl, "D704", bc1 = 823752, bc2 = 2471256)]:
#     print(i)

In [5]:
bcl = "241113_SL-EXH_0210_A22TMFJLT3"
# cellranger_count(bcl, "SI-TT-H4", "refdata-gex-GRCh38-2024-A")
# cellranger_count(bcl, "SI-TT-H5", "refdata-gex-GRCh38-2024-A")
# cellranger_count(bcl, "SI-TT-H6", "refdata-gex-GRCh38-2024-A")
# spatial_count(bcl, "SI-TT-H1", "recon/240716_SL-EXG_0167_B22CG3TLT4/D706-3/UMAP_n=45_d=0.1_I=spectral_c=fulltree45_c1=10-3000_c2=10-3000")
# spatial_count(bcl, "SI-TT-H2", "recon/240716_SL-EXG_0167_B22CG3TLT4/D706-3/UMAP_n=45_d=0.1_I=spectral_c=fulltree45_c1=10-3000_c2=10-3000")
# spatial_count(bcl, "SI-TT-H3", "recon/240716_SL-EXG_0167_B22CG3TLT4/D706-3/UMAP_n=45_d=0.1_I=spectral_c=fulltree45_c1=10-3000_c2=10-3000")

In [16]:
bcl = "241119_SL-EXJ_0123_B22GLY2LT4"
# recon_count(bcl, "D702")

In [13]:
bcl = "241202_SL-EXH_0225_B22J22GLT4"
#[recon_count(bcl, index) for index in [f"D{j+701}" for j in range(12)]];

In [15]:
bcl = "241202_SL-EXH_0226_A22HTHWLT4"
#[recon_count(bcl, index) for index in [f"D{j+701}" for j in range(12)]];

In [2]:
bcl = "241213_SL-EXB_0419_A22V35YLT3"
# [cellranger_count(bcl, index, "refdata-gex-GRCh38-2024-A") for index in ["SI-TT-A2", "SI-TT-A3", "SI-TT-A4", "SI-TT-A5", "SI-TT-A7", "SI-TT-E10"]]
# ["SI-TT-H8", "SI-TT-H9", "SI-TT-H10", "SI-TT-H11", "SI-TT-H12", "SI-TT-H7"]
# spatial_count(bcl, "SI-TT-H8", "/broad/macosko/leematth/recon/data/241018_SL-EXA_0332_B22VFMLLT3/D704/1/outputs")
# spatial_count(bcl, "SI-TT-H9", "/broad/macosko/leematth/recon/data/241018_SL-EXA_0332_B22VFMLLT3/D705/1/outputs")
# spatial_count(bcl, "SI-TT-H10", "/broad/macosko/leematth/recon/data/241018_SL-EXA_0332_B22VFMLLT3/D706/1/outputs")
# spatial_count(bcl, "SI-TT-H11", "/broad/macosko/leematth/recon/data/241018_SL-EXA_0332_B22VFMLLT3/D704/1/outputs")
# spatial_count(bcl, "SI-TT-H12", "/broad/macosko/leematth/recon/data/241018_SL-EXA_0332_B22VFMLLT3/D704/1/outputs")
# spatial_count(bcl, "SI-TT-H7", "/broad/macosko/pipelines/recon/240716_SL-EXG_0167_B22CG3TLT4/D703-3/UMAP_n=45_d=0.1_I=spectral_c=fulltree45_c1=10-3000_c2=10-3000")

In [28]:
indexes[49:56]

['D0201', 'D0801', 'D0901', 'D1001', 'D0702', 'D0902', 'D1002']

In [50]:
bcl = "241212_SL-EXM_0010_A22LHKJLT4"
ids = [(1,1),(3,1),(4,1),(6,1),(7,1),(11,1),(12,1),
       (1,2),(2,2),(3,2),(4,2),(5,2),(6,2),(11,2),
       (1,3),(2,3),(3,3),(4,3),(5,3),(6,3),(7,3),
       (9,3),(10,3),(4,4),(5,4),(6,4),(7,4),(8,4),
       (9,4),(10,4),(11,4),(12,4),(1,5),(5,5),(11,5),
       (1,6),(4,6),(5,6),(6,6),(2,7),(5,7),(10,7),
       (1,8),(3,8),(4,8),(5,8),(6,8),(8,8),(11,8),
       (2,1),(8,1),(9,1),(10,1),(7,2),(9,2),(10,2)]
indexes = [("D"+str(id7).zfill(2)+str(id5).zfill(2)) for id7,id5 in ids]
#[recon_count(bcl, index) for index in indexes]
#[recon(bcl, index) for index in indexes]

In [20]:
bcl = "241220_SL-EXM_0018_A22LHF3LT4"
indexes = {
    1: ["D709", "D711", "D702", "D703", "D705", "D706", "D707"],
    2: ["D708", "D709", "D710", "D703", "D704", "D711", "D702"],
    3: ["D705", "D709", "D702", "D703", "D704", "D701", "D706"],
    4: ["D703", "D704", "D705", "D702", "D707", "D708", "D709"],
    5: ["D710", "D711", "D712", "D701", "D702", "D703", "D704"],
    6: ["D705", "D706", "D707", "D708", "D709", "D710", "D711"],
    7: ["D701", "D704", "D707", "D702", "D703", "D706", "D708"],
    8: ["D703", "D706", "D701", "D702", "D708", "D704", "D707"]
}

# res = [recon(bcl, index, lane) for lane in indexes for index in indexes[lane]]
# with open("commands.txt", "w") as file:
#     file.write("\n".join(res[28:]))

In [19]:
bcl = "250124_SL-EXL_0050_B22KWLVLT4"
indexes = {
    1: ["D701", "D705", "D706", "D707", "D708", "D709", "D710", "D711", "D712"],
    2: ["D701", "D705", "D706", "D707", "D708", "D709", "D710", "D711", "D712"],
    3: ["D701", "D705", "D706", "D707", "D708", "D709", "D710", "D711", "D712"],
    4: ["D701", "D705", "D706", "D707", "D708", "D709", "D710", "D711", "D712"],
    5: ["D701", "D705", "D706", "D707", "D708", "D709", "D710", "D711", "D712"],
    6: ["D701", "D705", "D706", "D707", "D708", "D709", "D710", "D711", "D712"],
    7: ["D701", "D705", "D706", "D707", "D708", "D709", "D710", "D711", "D712"],
    8: ["D701", "D705", "D706", "D707", "D708", "D709", "D710", "D711", "D712"],
}
# [recon_count(bcl, index) for index in ["D701", "D705", "D706", "D707", "D708", "D709", "D710", "D711", "D712"]]:
# [recon(bcl, index) for index in ["D701", "D705", "D706", "D707", "D708", "D709", "D710", "D712"]]:

In [16]:
bcl = "250219_SL-EXD_0474_B22LHNYLT4"
transcriptome = "refdata-gex-GRCh38-2024-A"
RNA_indexes = ["SI-TT-B11", "SI-TT-B12", 
               "SI-TT-D8", "SI-TT-D9", "SI-TT-D10", 
               "SI-TT-A2", 
               "SI-TT-B1", "SI-TT-B2", "SI-TT-B3", "SI-TT-B4"]
SB_indexes = ["SI-TT-A11", "SI-TT-A10"]
              
              
              
# sb_7cm = ["SI-TT-B5", "SI-TT-B6", "SI-TT-B7", "SI-TT-B8"]
# sb_hip = ["SI-TT-A3",]
# sb_cap = ["SI-TT-G10", "SI-TT-G11", "SI-TT-G12"]
sb_sn1 = ["SI-TT-A10"]
sb_sn2 = ["SI-TT-A11"]
puck_dir_7cm = "/broad/macosko/pipelines/recon/250124_SL-EXL_0050_B22KWLVLT4/D701_bc1-8400000_bc2-20000000/UMAP_sb2_nn45_md0.1_s1.0_lc1_rs1.0_nsr10_ne3000"
puck_dir_hip = "/broad/macosko/pipelines/recon/250124_SL-EXL_0050_B22KWLVLT4/D708/UMAP_sb2_nn45_md0.1_s1.0_lc1_rs1.0_nsr10_ne500"
puck_dir_cap = "/broad/macosko/pipelines/recon/241202_SL-EXH_0225_B22J22GLT4/D704/UMAP_sb2_nn45_md0.1_s1.0_lc1_rs1.0_nsr10_ne2000"
puck_dir_sn1 = "/broad/macosko/leematth/recon-count/241202_SL-EXH_0226_A22HTHWLT4/D705/UMAP_n=45_d=0.1_c=fulltree45"
puck_dir_sn2 = "/broad/macosko/leematth/recon-count/241202_SL-EXH_0226_A22HTHWLT4/D706/UMAP_n=45_d=0.1_c=fulltree45"
# for i in [cellranger_count(bcl, index, transcriptome, chemistry="auto") for index in RNA_indexes]:
#     print(i)
# for i in [spatial_count(bcl, index, puck_dir_sn2) for index in sb_sn2]:
#     print(i)

In [17]:
# pucks 250221 --> 2 3cm and 1 7cm
# WALKUP-17859
bcl = "250221_SL-EXD_0476_B22LJWGLT4"
indexes = ["D701", "D702",  "D705"]
for i in [recon(bcl, index) for index in indexes]:
    print(i)

NameError: name 'recon' is not defined

In [18]:
# Spatial Library (Flex)
# 
bcl = '250227_SL-NVH_0956_AHTGGVDRX5'
puck_dir = '/broad/macosko/pipelines/recon/250124_SL-EXL_0050_B22KWLVLT4/D701_bc1-8400000_bc2-20000000/UMAP_sb2_nn45_md0.1_s1.0_lc1_rs1.0_nsr10_ne3000'
indexes = ['SI-TT-B9', 'SI-TT-B10']
# for i in [spatial_count(bcl, index, puck_dir) for index in indexes]:
#     print(i)

In [34]:
# Andrew, Juliana, Haoyuan 4 3cm pucks
# WALKUP-17888
# https://docs.google.com/spreadsheets/d/1rgoJvjOl-fiadjF2W7PD9G2zKiZ_jBMfILwmG89C5Yo/edit?gid=1811879660#gid=1811879660
bcl = '250304_SL-EXC_0505_B22LHFFLT4'
indexes = ["D701", "D702", "D703", "D704"]
# for i in [recon(bcl, index) for index in indexes]:
#     print(i)

In [35]:
# Andrew, Juliana, Haoyuan 4 3cm pucks set 1
# WALKUP-17933
# https://docs.google.com/spreadsheets/d/1rgoJvjOl-fiadjF2W7PD9G2zKiZ_jBMfILwmG89C5Yo/edit?gid=1811879660#gid=1811879660
bcl = "250307_SL-EXD_0489_B22LWTLLT4"
indexes = ["D705", "D706", "D707", "D708"]
# for i in [recon(bcl, index) for index in indexes]:
#     print(i)

In [37]:
# Andrew, Juliana, Haoyuan 4 3cm pucks set 2
# WALKUP-17933
# https://docs.google.com/spreadsheets/d/1rgoJvjOl-fiadjF2W7PD9G2zKiZ_jBMfILwmG89C5Yo/edit?gid=1811879660#gid=1811879660
bcl = "250307_SL-EXA_0451_B22LWL7LT4"
indexes = ["D709", "D710", "D711", "D712"]
# for i in [recon(bcl, index) for index in indexes]:
#     print(i)

In [44]:
# WALKUP-17975: 2025-03-14_Recon_3cm_set1
bcl = '250314_SL-EXC_0515_A22LTMCLT4'
indexes = ["D705", "D706", "D707", "D708"]
# for i in [recon(bcl, index) for index in indexes]:
#     print(i)

In [45]:
# WALKUP-17976: 2025-03-14_Recon_3cm_set2
bcl = '250314_SL-EXC_0514_B22LW7VLT4'
indexes = ["D709", "D710", "D711", "D712"]
# for i in [recon(bcl, index) for index in indexes]:
#     print(i)

In [13]:
# WALKUP-17974
transcriptome = "refdata-gex-GRCh38-2024-A"
bcl = '250314_SL-EXA_0457_B22M3WFLT4'
s12_GEX = ['SI-TT-F9', 'SI-TT-F10', 'SI-TT-F11', 'SI-TT-F12']
s13_GEX = ['SI-TT-G9', 'SI-TT-G10', 'SI-TT-G11', 'SI-TT-G12']
s12_spatial = ['SI-TT-A9', 'SI-TT-A10', 'SI-TT-A11', 'SI-TT-A12']
s13_spatial = ['SI-TT-H9', 'SI-TT-H10', 'SI-TT-H11', 'SI-TT-H12']
# s12
# [cellranger_count(bcl, index, transcriptome) for index in s12_GEX]
# s12_puck_dir = '/broad/macosko/pipelines/recon/250219_SL-EXH_0289_A22LK2MLT4/D703/UMAP_sb2_nn45_md0.1_s1.0_lc1_rs1.0_nsr10_ne2000/'
# for j in [spatial_count(bcl, index, s12_puck_dir) for index in s12_spatial]:
#     print(j)
# s13
# [cellranger_count(bcl, index, transcriptome) for index in s13_GEX]
# s13_puck_dir = '/broad/macosko/pipelines/recon/250219_SL-EXH_0289_A22LK2MLT4/D704_bc1-1100000_bc2-3700000/UMAP_sb2_nn45_md0.1_s1.0_lc1_rs1.0_nsr10_ne2000/m'
# for j in [spatial_count(bcl, index, s13_puck_dir) for index in s13_spatial]:
    # print(j)
# for x,y in zip(s12_GEX, s12_spatial):
#     print(f'Rscript run-positioning.R /broad/macosko/pipelines/cellranger-count/250314_SL-EXA_0457_B22M3WFLT4/{x}/outs /broad/macosko/pipelines/spatial-count/250314_SL-EXA_0457_B22M3WFLT4/{y} /broad/macosko/pipelines/positioning/250314_SL-EXA_0457_B22M3WFLT4/{y} &&')

In [12]:
# Mehrdad Calico Runs
transcriptome = "refdata-gex-GRCh38-2024-A"
bcl = '250323_SL-EXM_0088_A22LHG7LT4'
# spatial = ['SI-TT-G7', 'SI-TT-G8', 'SI-TT-G9', 'SI-TT-G10', 'SI-TT-G11', 'SI-TT-G12', 'SI-TT-H3', 'SI-TT-H4', 'SI-TT-H1', 'SI-TT-H2', 'SI-TT-H5', 'SI-TT-H6', 'SI-TT-H7', 'SI-TT-H8', 'SI-TT-H9', 'SI-TT-B4']
# GEX = ['SI-TT-F1', 'SI-TT-F2', 'SI-TT-F3', 'SI-TT-F4',  'SI-TT-F5',  'SI-TT-F6',  'SI-TT-F7', 'SI-TT-F8', 'SI-TT-F9', 'SI-TT-F10',  'SI-TT-F11', 'SI-TT-F12', 'SI-TT-G1', 'SI-TT-G2', 'SI-TT-G3', 'SI-TT-A1', 'SI-TT-G5', 'SI-TT-G6', 'SI-TT-B2', 'SI-TT-B3']
# spatial = ['SI-TT-G10', 'SI-TT-G11', 'SI-TT-H2']
spatial = ['SI-TT-C4']
GEX = ['SI-TT-B1']
puck_dir = '/broad/macosko/pipelines/recon/241212_SL-EXM_0010_A22LHKJLT4/D1102/'
# puck_dirs = [f'/broad/macosko/leematth/recon/241220_SL-EXM_0018_A22LHF3LT4/{x}' for x in ['1','2','3','4A','4B','5','6','6A','7','8','9A','9B','10','11','12','13']]
puck_dirs = [f'/broad/macosko/leematth/recon/241220_SL-EXM_0018_A22LHF3LT4/{x}' for x in ['4A','4B','8']]
# spatial = ['SI-TT-G10', 'SI-TT-G11']
# puck_dirs = [f'/broad/macosko/leematth/recon/241220_SL-EXM_0018_A22LHF3LT4/{x}' for x in ['4A','4B']]
# [cellranger_count(bcl, index, transcriptome) for index in GEX]
# assert len(puck_dirs) == len(spatial)
# for j in [spatial_count(bcl, index, puck_dir) for index, puck_dir in zip(spatial, puck_dirs)]:
#     print(j)

In [54]:
# WALKUP-18040
bcl = '250329_SL-EXA_0468_B22NHC7LT4'
indexes = ['D703', 'D704', 'D706']
# for i in [recon(bcl, index) for index in indexes]:
#     print(i)

In [11]:
# WALKUP-18037
bcl = '250327_SL-EXB_0517_A22VTC3LT3'
# RNA_indexes = ["SI-TT-"+x for x in ["D11","D12","A9","A10"]]
# SB_indexes = ["SI-TT-"+x for x in ["H10","H11","A11","A12"]]
# RNA_indexes = ["SI-TT-"+x for x in ["A9","A10"]]
SB_indexes = ["SI-TT-"+x for x in ["A11","A12"]]
puck_dir = '/broad/macosko/pipelines/recon/250304_SL-EXC_0505_B22LHFFLT4/D704/UMAP_sb2_nn45_md0.1_s1.0_lc1_rs1.0_nsr10_ne1500'
# [cellranger_count(bcl, index, "refdata-gex-GRCh38-2024-A", chemistry="auto") for index in RNA_indexes];
# for i in [spatial_count(bcl, index, puck_dir) for index in SB_indexes]:
#     print(i)

In [10]:
# 250323_SL-EXM_0088_A22LHG7LT4 Mehrdad Calico batch #1
bcl = '250323_SL-EXM_0088_A22LHG7LT4'
SB_indexes = ["SI-TT-H2"]
puck_dir = '/broad/macosko/leematth/recon/aggregates/250323_SL-EXM_0088_A22LHG7LT4/8'
# for i in [spatial_count(bcl, index, puck_dir) for index in SB_indexes]:
#     print(i)

In [13]:
# Margaret MULTI-seq (/broad/macosko/data/flowcells/250512_VL00297_86_AAFKVLCM5/Data/Intensities/BaseCalls/fastqs  )
bcl = '250512_VL00297_86_AAFKVLCM5'
index = 'SI-TT-C12'
transcriptome = 'refdata-gex-GRCh38-2024-A'
cellranger_count(bcl, index, transcriptome, chemistry="auto")

/broad/macosko/pipelines/scripts/cellranger-count.sh 250512_VL00297_86_AAFKVLCM5 SI-TT-C12 refdata-gex-GRCh38-2024-A auto


### Re-Runs

In [15]:
bcl = "241105_SL-EXH_0206_B22TMH7LT3"
#recon_count(bcl, "D704", bc1=823752, bc2=2471256, p=1.0)
#recon_count(bcl, "D703", bc1=1000000, bc2=2864049, p=1.0)
# recon_count(bcl, "D704", bc1=1000000, bc2=2864049, p=1.0)

In [16]:
bcl = "240923_SL-EXH_0185_B22G7HYLT4"
#recon_count(bcl, "D704", bc1=333515, bc2=667030)
#recon_count(bcl, "D705", bc1=258400, bc2=516800)

In [17]:
bcl = "241002_SL-EXH_0192_A22NLKLLT3"
puck_dir = "/broad/macosko/leematth/spatial-count/241002_SL-EXH_0192_A22NLKLLT3/pucks"
# [cellranger_count(bcl, index, "refdata-gex-GRCh38-2024-A", chemistry="auto") for index in ["SI-TT-B9", "SI-TT-B10"]];
#spatial_count(bcl, "SI-TT-G3", puck_dir, p=0.1)
#spatial_count(bcl, "SI-TT-G4", puck_dir, p=0.1)

In [10]:
bcl = "250527_SL-EXB_0558_B22Y57YLT4"
puck_dir = "/broad/macosko/pipelines/recon/250320_SL-EXE_0508_A22LHGKLT4/SA3/"
spatial_count(bcl, "SI-TT-E3", puck_dir, p=1)
# spatial_count(bcl, "SI-TT-A11", puck_dir, p=0.45)
# spatial_count(bcl, "SI-TT-A11", puck_dir, p=0.40)
# spatial_count(bcl, "SI-TT-A11", puck_dir, p=0.35)
# spatial_count(bcl, "SI-TT-A11", puck_dir, p=0.30)
# spatial_count(bcl, "SI-TT-A11", puck_dir, p=0.25)
# spatial_count(bcl, "SI-TT-A11", puck_dir, p=0.20)
# spatial_count(bcl, "SI-TT-A11", puck_dir, p=0.15)
# spatial_count(bcl, "SI-TT-A11", puck_dir, p=0.10)
# spatial_count(bcl, "SI-TT-A11", puck_dir, p=0.05)

'sbatch --nodelist=slurm-bits-d[002-005] -C container -o /broad/macosko/pipelines/logs/250527_SL-EXB_0558_B22Y57YLT4/SI-TT-E3/spatial-count.log -J spatial-count-250527_SL-EXB_0558_B22Y57YLT4-SI-TT-E3                       -c 9 --mem 28G --time 24:00:00                       --mail-user macosko-pipelines@broadinstitute.org --mail-type END,FAIL,REQUEUE,INVALID_DEPEND,STAGE_OUT,TIME_LIMIT --wrap "podman run --rm -v /broad/macosko/pipelines:/broad/macosko/pipelines -v /broad/macosko:/broad/macosko:ro pipeline-image \'julia --heap-size-hint=28G /broad/macosko/pipelines/scripts/spatial-count.jl /broad/macosko/pipelines/fastqs/250527_SL-EXB_0558_B22Y57YLT4 /broad/macosko/pipelines/recon/250320_SL-EXE_0508_A22LHGKLT4/SA3/ /broad/macosko/pipelines/spatial-count/250527_SL-EXB_0558_B22Y57YLT4/SI-TT-E3 -r SI-TT-E3 -p 1\'"'

In [31]:
bcl = "241119_SL-EXJ_0123_B22GLY2LT4"
# recon_count(bcl, "D702", p = 0.5, bc1 = 8216770, bc2 = 23000000)

In [29]:
bcl = "241119_SL-EXJ_0123_B22GLY2LT4"
# recon_count(bcl, "D702", p = 0.25, bc1 = 8216770, bc2 = 23000000)

In [5]:
bcl = "241202_SL-EXH_0225_B22J22GLT4"
ps = [0.6, 0.7, 0.8, 0.9]
# ps = [1]
# for i in [recon_count(bcl, index, p = p) for index in ["D704"] for p in ps]:
#     print(i)
# for i in [recon(bcl, index, p = p) for index in ["D703","D704"] for p in ps]:
#     print(i)

In [7]:
bcl = "241202_SL-EXH_0226_A22HTHWLT4"
ps = [0.6, 0.7, 0.8]
# ps = [1]
# for i in [recon(bcl, index, p = p, n_epochs = 1000) for index in ["D701","D702"] for p in ps]:
#     print(i)
# [recon(bcl, index, p = p) for index in ["D701","D702"] for p in ps]

In [8]:
bcl = "250124_SL-EXL_0050_B22KWLVLT4"
# for i in  [recon(bcl, index, bc1 = 8400000, bc2 = 20000000, n_epochs = 1000, min_dist = 0.05, spread = 0.5) for index in ["D701"]]:
#     print(i)

In [9]:
###### bcl = "250124_SL-EXL_0050_B22KWLVLT4"
indexes = {
    1: ["D701", "D705", "D706", "D707", "D708", "D709", "D710", "D711", "D712"],
    2: ["D701", "D705", "D706", "D707", "D708", "D709", "D710", "D711", "D712"],
    3: ["D701", "D705", "D706", "D707", "D708", "D709", "D710", "D711", "D712"],
    4: ["D701", "D705", "D706", "D707", "D708", "D709", "D710", "D711", "D712"],
    5: ["D701", "D705", "D706", "D707", "D708", "D709", "D710", "D711", "D712"],
    6: ["D701", "D705", "D706", "D707", "D708", "D709", "D710", "D711", "D712"],
    7: ["D701", "D705", "D706", "D707", "D708", "D709", "D710", "D711", "D712"],
    8: ["D701", "D705", "D706", "D707", "D708", "D709", "D710", "D711", "D712"],
}
# for i in [recon_count(bcl, index, bc1 = 6000000, bc2 = 16000000) for index in ["D701"]]:
#     print(i)
# for i in [recon(bcl, index, n_neighbors = 32) for index in ["D701"]]:
#     print(i)
# for i in [recon(bcl, index, n_epochs = 2000) for index in ["D701"]]:
#     print(i)

In [10]:
bcl = "240923_SL-EXH_0185_B22G7HYLT4"
indexes = ["D701_S1_L003", "D701_S1_L005", "D702", "D703", "D704", "D705", "D706", "D707", "D708", "D709", "D710"]
# for i in [recon(bcl, index) for index in indexes]:
#     print(i)
# PROCESSING

In [11]:
bcl = "241105_SL-EXH_0206_B22TMH7LT3"
# for i in [recon(bcl, index) for index in ["D701","D702","D703","D704","D707","D708"]]:
#     print(i)
# for i in [recon_count(bcl, index, bc1 = 823752, bc2 = 2471256) for index in ["D704"]]:
#     print(i)

In [12]:
bcl = "241202_SL-EXH_0225_B22J22GLT4"
# for i in [recon(bcl, "D704", n_epochs = 2000)]:
#     print(i)

In [13]:
bcl = "241220_SL-EXM_0018_A22LHF3LT4"
# for i in [recon(bcl, 'D703', lane = 5, bc2 = 631000)]:
#     print(i)

In [14]:
bcl = "250219_SL-EXH_0289_A22LK2MLT4"
indexes = ["D704"]
# for i in [recon(bcl, index, bc1 = 1100000, bc2 = 3700000, n_epochs = 2000) for index in indexes]:
#     print(i)

In [15]:
bcl = "250219_SL-EXH_0289_A22LK2MLT4"
indexes = ["D701", "D702", "D703", "D704"]
ps = [0.6, 0.7, 0.8, 0.9]
# for i in [recon_count(bcl, index, p = p) for index in indexes for p in ps]:
#     print(i)

In [39]:
# WALKUP-17975: 2025-03-14_Recon_3cm_set1
bcl = '250314_SL-EXC_0515_A22LTMCLT4'
indexes = ["D705", "D706", "D707", "D708"]
# for i in [recon(bcl, index, local_connectivity = 6) for index in indexes]:
#     print(i)

In [40]:
# WALKUP-17976: 2025-03-14_Recon_3cm_set2
bcl = '250314_SL-EXC_0514_B22LW7VLT4'
indexes = ["D711"]
# for i in [recon_count(bcl, index, bc2=4400000) for index in indexes]:
#     print(i)

In [None]:
bcl = "240923_SL-EXH_0185_B22G7HYLT4"
indexes = ["D701_S1_L003", "D701_S1_L005", "D702", "D703", "D704", "D705", "D706", "D707", "D708", "D709", "D710"]
# [recon_count(bcl, index) for index in indexes];
# PROCESSING

In [49]:
# WALKUP-17974
transcriptome = "refdata-gex-GRCm39-2024-A"
bcl = '250323_SL-EXM_0088_A22LHG7LT4'
cellranger_count(bcl, 'SI-TT-B1', transcriptome)

/broad/macosko/pipelines/scripts/cellranger-count.sh 250323_SL-EXM_0088_A22LHG7LT4 SI-TT-B1 refdata-gex-GRCm39-2024-A auto


In [19]:
# BICAN batch 18 downsampling
bcl = '250408_SL-EXJ_0235_B22V3KGLT4'
ps = [0.25]
# for i in [recon_count(bcl, index, p = p) for index in ["D701","D702","D703","D704"] for p in ps]:
#     print(i)
# for i in [recon(bcl, index, p = p) for index in ["D701","D702","D703","D704"] for p in ps]:
#     print(i)

In [23]:
# WALKUP-18225: 2025-04-25_Recon_7cm
bcl = '250501_SL-EXK_0144_B22W35FLT4'
bc1 = 5000000
bc2 = 18000000
for i in [recon_count_and_run(bcl, index, bc1 = bc1, bc2 = bc2) for index in ["D701"]]:
    print(i)

sbatch --partition=hpcx_macosko -C container -o /broad/macosko/pipelines/logs/250501_SL-EXK_0144_B22W35FLT4/D701_bc1-5000000_bc2-18000000/recon.log -J recon-250501_SL-EXK_0144_B22W35FLT4-D701_bc1-5000000_bc2-18000000                       --mem 952G -c 8 -t 72:00:00                       --mail-user macosko-pipelines@broadinstitute.org --mail-type END,FAIL,REQUEUE,INVALID_DEPEND,STAGE_OUT,TIME_LIMIT --wrap "podman run --rm --init --pull never -v /broad/macosko/pipelines:/broad/macosko/pipelines:rw pipeline-image 'julia --threads 8 --heap-size-hint=952G /broad/macosko/pipelines/scripts/recon-count.jl /broad/macosko/pipelines/fastqs/250501_SL-EXK_0144_B22W35FLT4 /broad/macosko/pipelines/recon/250501_SL-EXK_0144_B22W35FLT4/D701_bc1-5000000_bc2-18000000 -r D701.* -p 1.0 -x 5000000 -y 18000000 ; micromamba run python /broad/macosko/pipelines/scripts/knn.py -i /broad/macosko/pipelines/recon/250501_SL-EXK_0144_B22W35FLT4/D701_bc1-5000000_bc2-18000000 -o /broad/macosko/pipelines/recon/250501_S

In [1]:
import firecloud.api as fapi

In [4]:
namespace = "testmybroad"
workspace = "Macosko-Pipelines"
cnamespace = "macosko-pipelines"
method = "cellranger-count"

fapi.validate_config(namespace, workspace, cnamespace, method).json()

{'extraInputs': [],
 'invalidInputs': {},
 'invalidOutputs': {},
 'methodConfiguration': {'deleted': False,
  'inputs': {'cellranger_count.sample': 'this.sample',
   'cellranger_count.count_output_path': '',
   'cellranger_count.params': '""',
   'cellranger_count.reference': 'this.reference',
   'cellranger_count.docker': '',
   'cellranger_count.bucket': '',
   'cellranger_count.technique': '"cellranger"',
   'cellranger_count.log_output_path': '',
   'cellranger_count.fastq_paths': 'this.fastq_paths',
   'cellranger_count.disk_GiB': '"1480"',
   'cellranger_count.mem_GiB': '"64"',
   'cellranger_count.id': 'this.id'},
  'methodConfigVersion': 138,
  'methodRepoMethod': {'methodName': 'cellranger-count',
   'methodVersion': 26,
   'methodNamespace': 'macosko-pipelines',
   'methodUri': 'agora://macosko-pipelines/cellranger-count/26',
   'sourceRepo': 'agora'},
  'name': 'cellranger-count',
  'namespace': 'macosko-pipelines',
  'outputs': {},
  'prerequisites': {},
  'rootEntityType':