In [3]:
import firecloud.api as fapi
import pandas as pd
import csv
import os

bucket = "fc-secure-d99fbd65-eb27-4989-95b4-4cf559aa7d36"
bcl_root = f"gs://{bucket}/bcls"
samplesheet_root = f"gs://{bucket}/samplesheets"
fastqs_root = f"gs://{bucket}/fastqs"

indexes_root = "https://raw.githubusercontent.com/MacoskoLab/Macosko-Pipelines/main/tools/indexes/"
NN = pd.read_csv(indexes_root+"SI-NN.csv")
NT = pd.read_csv(indexes_root+"SI-NT.csv")
TT = pd.read_csv(indexes_root+"SI-TT.csv")
ND = pd.read_csv(indexes_root+"ND7.csv")

In [4]:
def dict2csv(indexes):
    with open('single.csv', 'w', newline='') as f1, open('dual.csv', 'w', newline='') as f2:
        single = csv.writer(f1)
        single.writerow(["[Data]"])
        single.writerow(["Sample_ID", "index", "Lane"])
        
        dual = csv.writer(f2)
        dual.writerow(["[Data]"])
        dual.writerow(["Sample_ID", "index", "index2", "Lane"])
        
        for lane in sorted(indexes.keys()):
            for index in indexes[lane]:
                if index in ND["I7_Index_ID"].values:
                    match = ND.loc[ND['I7_Index_ID'] == index,:]
                    single.writerow([index, match["index"].values[0], lane])
                elif index in TT["index_name"].values:
                    match = TT.loc[TT['index_name'] == index,:]
                    dual.writerow([index, match["index(i7)"].values[0], match["index2_workflow_b(i5)"].values[0], lane])
                elif index in NT["index_name"].values:
                    match = NT.loc[NT['index_name'] == index,:]
                    dual.writerow([index, match["index(i7)"].values[0], match["index2_workflow_b(i5)"].values[0], lane])
                elif index in NN["index_name"].values:
                    match = NN.loc[NN['index_name'] == index,:]
                    dual.writerow([index, match["index(i7)"].values[0], match["index2_workflow_b(i5)"].values[0], lane])
                else:
                    raise IndexError(f"ERROR: index {index} not found")
                    
def upload(bcl, samplesheet_root=samplesheet_root):
    if sum(1 for _ in open("single.csv")) > 2:
        !gsutil cp single.csv {samplesheet_root}/{bcl}/single.csv
    if sum(1 for _ in open("dual.csv")) > 2:
        !gsutil cp dual.csv {samplesheet_root}/{bcl}/dual.csv

def run(bcl, sheet):
    # Update the configuration
    body = fapi.get_workspace_config("testmybroad", "Macosko-Pipelines", "macosko-pipelines", "bcl2fastq").json()  
    body["inputs"]["bcl2fastq.technique"] = '"bcl2fastq"'
    body["inputs"]["bcl2fastq.bcl"] = f'"{bcl_root}/{bcl}"'
    body["inputs"]["bcl2fastq.samplesheet"] = f'"{samplesheet_root}/{bcl}/{sheet}"'
    fapi.update_workspace_config("testmybroad", "Macosko-Pipelines", "macosko-pipelines", "bcl2fastq", body).json()
    
    # Validate the configuration
    res = fapi.validate_config("testmybroad", "Macosko-Pipelines", "macosko-pipelines", "bcl2fastq").json()
    assert res["extraInputs"] == [], f"ERROR: extra input: \n{res['extraInputs']}"
    assert res["invalidInputs"] == {}, f"ERROR: invalid input: \n{res['invalidInputs']}"
    assert res["invalidOutputs"] == {}, f"ERROR: invalid output: \n{res['invalidOutputs']}"
    assert res["missingInputs"] == [], f"ERROR: missing input: \n{res['missingInputs']}"
    
    # Submit the job
    fapi.create_submission("testmybroad", "Macosko-Pipelines", "macosko-pipelines", "bcl2fastq").json()
    
    print(f"Submitted {sheet} for {bcl}")

In [6]:
bcl = "240609_SL-EXC_0308_A22KHFYLT3"
indexes = {
    1: ["D702", "D703", "D706", "D707", "SI-TT-C6"],
    2: ["D702", "D703", "D706", "D707", "SI-TT-C6"],
    3: ["D702", "D703", "D706", "D707", "SI-TT-C6"],
    4: ["D702", "D703", "D706", "D707", "SI-TT-C6"],
    5: ["D702", "D707", "SI-TT-A12"],
    6: [],
    7: [],
    8: []
}

In [10]:
bcl = "240615_SL-EXG_0144_A22KH5WLT3"
indexes = {
    1: ["D703", "D704", "D705", "D706", "SI-TT-B4", "SI-TT-B5", "SI-TT-B6", "SI-TT-C3", "SI-TT-C4", "SI-TT-C5", "SI-TT-C7"],
    2: ["D703", "D704", "D705", "D706", "SI-TT-B4", "SI-TT-B5", "SI-TT-B6", "SI-TT-C3", "SI-TT-C4", "SI-TT-C5", "SI-TT-C7"],
    3: ["D703", "D704", "D705", "D706", "SI-TT-B4", "SI-TT-B5", "SI-TT-B6", "SI-TT-C3", "SI-TT-C4", "SI-TT-C5", "SI-TT-C7"],
    4: ["D703", "D704", "D705", "D706", "SI-TT-B4", "SI-TT-B5", "SI-TT-B6", "SI-TT-C3", "SI-TT-C4", "SI-TT-C5", "SI-TT-C7"],
    5: ["D703", "D704", "D705", "D706", "SI-TT-B4", "SI-TT-B5", "SI-TT-B6", "SI-TT-C3", "SI-TT-C4", "SI-TT-C5", "SI-TT-C7"],
    6: ["D703", "D704", "D705", "D706", "SI-TT-B4", "SI-TT-B5", "SI-TT-B6", "SI-TT-C3", "SI-TT-C4", "SI-TT-C5", "SI-TT-C7"],
    7: [],
    8: []
}

In [6]:
bcl = "240702_SL-EXE_0307_A22K7KKLT3"
indexes = {
    1: ["SI-TT-D5", "SI-TT-D6", "SI-TT-C9", "SI-TT-C10", "SI-TT-A2", "SI-TT-B2", "SI-TT-C2", "SI-TT-D2"],
    2: ["SI-TT-D5", "SI-TT-D6", "SI-TT-C9", "SI-TT-C10", "SI-TT-A2", "SI-TT-B2", "SI-TT-C2", "SI-TT-D2"],
    3: ["SI-TT-D5", "SI-TT-D6", "SI-TT-C9", "SI-TT-C10", "SI-TT-A2", "SI-TT-B2", "SI-TT-C2", "SI-TT-D2"],
    4: ["SI-TT-D5", "SI-TT-D6", "SI-TT-C9", "SI-TT-C10", "SI-TT-A2", "SI-TT-B2", "SI-TT-C2", "SI-TT-D2"],
    5: ["SI-TT-D5", "SI-TT-D6", "SI-TT-C9", "SI-TT-C10", "SI-TT-A2", "SI-TT-B2", "SI-TT-C2", "SI-TT-D2"],
    6: ["SI-TT-D5", "SI-TT-D6", "SI-TT-C9", "SI-TT-C10", "SI-TT-A2", "SI-TT-B2", "SI-TT-C2", "SI-TT-D2"],
    7: ["SI-TT-D5", "SI-TT-D6", "SI-TT-C9", "SI-TT-C10", "SI-TT-A2", "SI-TT-B2", "SI-TT-C2", "SI-TT-D2"],
    8: ["SI-TT-D5", "SI-TT-D6", "SI-TT-C9", "SI-TT-C10", "SI-TT-A2", "SI-TT-B2", "SI-TT-C2", "SI-TT-D2"]
}

In [7]:
# Create the samplesheet
dict2csv(indexes)
# !cat single.csv
# !cat dual.csv
# upload(bcl)

In [8]:
# Submit jobs
#run(bcl, "single.csv")
#run(bcl, "dual.csv")

In [9]:
# Check the FASTQ sizes
print(bcl)
sizes = !gsutil du -h {fastqs_root}/{bcl}
sizes = [size.split() for size in sizes]
sizes = [(size[0]+" "+size[1], os.path.basename(size[2])) for size in sizes if size[2][-9:] == ".fastq.gz"]
for lane in sorted(indexes.keys()):
    for index in indexes[lane]:
        res = [size[0] for size in sizes if f"{index}_S" in size[1] and f"_L00{lane}_" in size[1]]
        warning = "WARNING!!" if any(" B" in s or " KiB" in s for s in res) or len(res) == 0 else ""
        print(f"{lane} {index} {res} {warning}")

In [11]:
# List all bcl2fastq submissions
subs = fapi.list_submissions("testmybroad", "Macosko-Pipelines").json()
subs = [sub for sub in subs if sub["status"] not in ["Done","Aborted"]]
subs = [sub for sub in subs if sub["methodConfigurationName"].split("_")[0]=="bcl2fastq"]
print(subs)

In [None]:
# Abort bcl2fastq submissions
ids = [sub["submissionId"] for sub in subs]
[fapi.abort_submission("testmybroad", "Macosko-Pipelines", submission_id) for submission_id in ids]