In [1]:
# END TO END GENERATION OF MARMOT METADATA INFORMATION
# lkp 2023/08/17
#
# Purpose: build a script that will allow a user (Kyle)
# to programmatically generate csv files for upload into
# Scispot LabSheets via bulk import


import pandas as pd
from datetime import date
import numpy as np
import os

In [2]:
## TODO add universal variables here (if there are any)
# future work note: link to GDrive location? S3?

meta_dir = 'G:/My Drive/Lindsay Pino/proj/2023_scispot_utils/data' 
#treatment_file = "./Treatment_Layout_for_testing.csv"  # old layout
treatment_file = "./2023-08-29_SET1_REP1_treatment_layout.csv"  # new layout
output_dir = "G:/My Drive/Lindsay Pino/proj/2023_scispot_utils/data/metadata_output/"

# define Labsheet columns
culture_columns = [
    'Registry ID',
    'Cell Type', 
    'Name',
    'Culture State',
    'Stock Type',
    'Mycoplasma Test',
    'Passage',
    'Supplier',
    'Supplier Batch ID',
    'Received Date',
    'Number of Cells Seeded',
    'Volume(ml)',
    'Treatment Type',
    'Treatment Compound Stock ID',
    'Treatment Dose',
    'Treatment Dose Unit',
    'Freezing Protocol ID',
    'Culture Protocol ID',
    'Treatment Protocol ID',
    'Prepared By',
    'Preparation Date',
    'Record Creator',
    "Storage Location",
    #'Parent Culture',
    #'Child Cultures'
    "Plate Barcode",
    "Plate Name",
    "Well Position"
]

frx_columns = ["Registry ID",
           "Name",
           "Parent Sample",
           "Cellular Fraction",
           "Post-Treatment Time Point",
           "Time Point Unit",
           "Volume (uL)",
           "Protein concentration (ug/ul)",
           "Harvest Protocol ID",
           "Fractionation Protocol ID",
           "Prepared By",
           "Preparation Date",
           "Record Creator",
           "Storage Location",
           "Plate Barcode",
           "Plate Name",
           "Well Position"]

digest_columns = [
    "Registry ID", 
    "Name", 
    "Parent Sample",
    "Volume (uL)", 
    "Protein Input (ug)", 
    "Spike-In Standard", 
    "Spike-In Amount", 
    "Spike-In Lot ID", 
    "Digest Protocol ID", 
    "Prepared By", 
    "Preparation Date", 
    "Record Creator", 
    "Storage Location", 
    "Plate Barcode", 
    "Plate Name",
    "Well Position"
    ]

msrun_columns = [
    "Registry ID",
    "Run Name",
    "Run Date",
    "Prepared By",
    "Acquisition Type",
    "Gradient Length (Minutes)",
    "Instrument",
    "Instrument LC Type",
    "LC Deck Position",
    "Column ID",
    "Peptide Digest IDs",
    "Sample Loaded Mass",
    "Sample Loaded Mass Unit",
    "Injection Volume (uL)",
    "Injection Mass",
    "Injection Mass Unit",
    "Evotip Lot Number",
    "Sample Loading Protocol ID",
    "MS Acquisition Protocol ID",
    "Record Creator",
    "Evotip Rack Barcode",
    "Evotip Rack Name",
    "Tip Position"
]

lcms_queue_columns = [
    'Vial', # S# is evosep rack position. There are positions S1 through S6
    'Status',
    'Sample ID', # Yes - those filenames are the ones that will be uploaded to AWS
    'Volume [ul]',
    'Data Path',
    'Method Set',
    'Separation Method',
    'Injection Method',
    'MS Method'
]

In [3]:
# Python Script 1: Cell Culture
# Lindsay's version

# user-defined values
NUM_CELLS_SEEDED = 400000
VOL_CULTURE = 0.17
TRX_PROTOCOL_ID = 'PR003-V1'
PREP_BY = "Bodhi Hueffmeier"
PLATE_BARCODE = "PL0002"
SET = 1
REP = 1
# TODO: "Dest_Plate_ID" in the treatment info file also has the set/rep information
# so don't need to ask for these values from the user
PLATE_NAME_SUFFIX = "Treated cells"
PLATE_NAME = "Set " + str(SET) + " Rep " + str(REP) + " Treated cells"
OUTPUT_SUFFIX = "culture"

def culture_rows(increment, parent_compound, trx_compound_stock_id, dose, trx_dose_unit, trx_type, well_position):
    
    
    name = str(parent_compound) + " Rep " + str(REP)
    
    rows = [
    str("CUL" + str(increment)), # Registry ID - don't do anything with   Registry ID,
    '', # Cell Type
    name, # Name (temporary field)                   Name,
    'Active', # 'Culture State',
    '', # 'Stock Type',
    '', # 'Mycoplasma Test',
    '', # 'Passage',
    '', # 'Supplier',
    '', # 'Supplier Batch ID',
    '', # 'Received Date',
    NUM_CELLS_SEEDED, # 'Number of Cells Seeded',
    VOL_CULTURE, # 'Volume(ml)',
    trx_type, # 'Treatment Type',
    trx_compound_stock_id, # 'Treatment Compound Stock ID',
    dose, # 'Treatment Dose',
    trx_dose_unit, # 'Treatment Dose Unit',
    '', # 'Freezing Protocol ID',
    '', # 'Culture Protocol ID',
    TRX_PROTOCOL_ID, # 'Treatment Protocol ID',
    PREP_BY, # 'Prepared By',
    str(date.today()), # 'Preparation Date',
    '', # 'Record Creator',
    '', # "Storage Location",
    #'Parent Culture',
    #'Child Cultures'
    PLATE_BARCODE, # Plate Barcode                   Plate Barcode
    PLATE_NAME, # Plate Name
    well_position # Well Position
    ]

    return rows

treatment_info = pd.read_csv(os.path.join(meta_dir, treatment_file))

culture_df_out = pd.DataFrame(columns=culture_columns)

i = 1
for index, row in treatment_info.iterrows():
    
    #print(row)
    
    # scrape user-provided info
    compound = row['Compound_ID']
    #compound_stock_id = str(row['Compound Aliquot ID'])
    compound_stock_id = ''
    dose = row['Dest_Final_Conc_uM']
    #dose_unit = row['Dose Unit']
    dose_unit = 'uM'
    treatment_type = row['Experiment_Condition']
    well_pos = str(row['Dest_Well'])
    
    # reset values when the compound is DMSO
    if treatment_type == "DMSO":
        compound = "DMSO"
        compound_stock_id = ""
        dose = ""
        dose_unit = ""
    
    if treatment_type == "Fractionation control":
        continue # skip if fractionation control    
    
    # pass scraped variables to create new Labsheet row
    new_row = culture_rows(i, compound, compound_stock_id, dose, dose_unit, treatment_type, well_pos)
    
    # append to the final output csv dataframe
    #culture_df_out = culture_df_out.append(pd.DataFrame([new_row], columns=culture_columns)) # depreciated
    culture_df_out = pd.concat([culture_df_out, pd.DataFrame([new_row], columns=culture_columns)])
    
    # increment counter for next compound
    i += 1

#culture_df_out

culture_out_filename = str(date.today()) + "-" + PLATE_BARCODE + "-" + OUTPUT_SUFFIX + ".csv"
culture_df_out.to_csv(os.path.join(output_dir, culture_out_filename), index=False)
#del culture_df_out

culture_df_out

Unnamed: 0,Registry ID,Cell Type,Name,Culture State,Stock Type,Mycoplasma Test,Passage,Supplier,Supplier Batch ID,Received Date,...,Freezing Protocol ID,Culture Protocol ID,Treatment Protocol ID,Prepared By,Preparation Date,Record Creator,Storage Location,Plate Barcode,Plate Name,Well Position
0,CUL1,,TAL501 Rep 1,Active,,,,,,,...,,,PR003-V1,Prepared By,2023-08-18,Record Creator,Storage Location,PL0002,Set 1 Rep 1 Treated cells,D6
0,CUL2,,TAL502 Rep 1,Active,,,,,,,...,,,PR003-V1,Prepared By,2023-08-18,Record Creator,Storage Location,PL0002,Set 1 Rep 1 Treated cells,E3
0,CUL3,,TAL503 Rep 1,Active,,,,,,,...,,,PR003-V1,Prepared By,2023-08-18,Record Creator,Storage Location,PL0002,Set 1 Rep 1 Treated cells,G12
0,CUL4,,TAL504 Rep 1,Active,,,,,,,...,,,PR003-V1,Prepared By,2023-08-18,Record Creator,Storage Location,PL0002,Set 1 Rep 1 Treated cells,E5
0,CUL5,,TAL505 Rep 1,Active,,,,,,,...,,,PR003-V1,Prepared By,2023-08-18,Record Creator,Storage Location,PL0002,Set 1 Rep 1 Treated cells,A6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,CUL88,,dBET6 Rep 1,Active,,,,,,,...,,,PR003-V1,Prepared By,2023-08-18,Record Creator,Storage Location,PL0002,Set 1 Rep 1 Treated cells,B6
0,CUL89,,DMSO Rep 1,Active,,,,,,,...,,,PR003-V1,Prepared By,2023-08-18,Record Creator,Storage Location,PL0002,Set 1 Rep 1 Treated cells,C10
0,CUL90,,DMSO Rep 1,Active,,,,,,,...,,,PR003-V1,Prepared By,2023-08-18,Record Creator,Storage Location,PL0002,Set 1 Rep 1 Treated cells,B4
0,CUL91,,DMSO Rep 1,Active,,,,,,,...,,,PR003-V1,Prepared By,2023-08-18,Record Creator,Storage Location,PL0002,Set 1 Rep 1 Treated cells,A8


In [4]:
# Python Script 2: Fractionation
# Register 288 new samples and their plate locations

# hardcoded variables
CELLULAR_FRACTION = ['Nucleoplasm', 'Chromatin', 'Insoluble'] # iterate through each to get the full 3x fractionation
TX_TIME = 4
TX_TIME_UNIT = "Hours"
VOL_FRX = 125
HARVEST_PROTOCOL_ID = "PR003-V1"
FRX_PROTOCOL_ID = "PR004-V1"
PREP_BY = "Bodhi Hueffmeier"
PLATE_BARCODE = "PL0002"
#PLATE_BARCODE = "PL" + str(int(treatment_plate['Plate Barcode'].drop_duplicates()[0][2:]) + 1).rjust(4, '0')
PLATE_NAME = "Set 1 Rep 1"
WELL_POSITION = "" # see below, inherited from previous plate
OUTPUT_SUFFIX = "fractionation"
#culture_out_filename = '2023-09-01-PL0002-culture.csv'

# read in the previous metadata file
treatment_plate_file = os.path.join(output_dir, culture_out_filename)
treatment_plate = pd.read_csv(treatment_plate_file)


def cell_fraction_rows(increment, parent_sample, cell_frx):
    name = ""
    well_position = ""
    
    if parent_sample != "NUC1":
        well_position = str(treatment_plate.loc[treatment_plate['Registry ID'] == parent_sample]['Well Position'].iloc[0])
    
    rows = [
    str("FRA" + str(increment)), # Registry ID - don't do anything with   Registry ID,
    name, # Name (temporary field)                   Name,
    parent_sample, # Parent Sample                   Parent Sample, 
    cell_frx, # Cellular Fraction                    Cellular Fraction,
    TX_TIME, # Post-Treatment Time Point             Post-Treatment Time Point,
    TX_TIME_UNIT, # Time Point Unit                  Time Point Unit
    VOL_FRX, # Volume (uL)                               Volume (uL),
    "", # Protein Concentration (ug/uL)              Protein Concentration (ug/uL),
    HARVEST_PROTOCOL_ID, # Harvest Protocol ID       Harvest Protocol ID,
    FRX_PROTOCOL_ID, # Fractionation Protocol ID     Fractionation Protocol ID,
    PREP_BY, # Prepared By                           Prepared By,
    str(date.today()), # Preparation Date            Preparation Date,
    "", # Record Creator                             Record Creator,
    "", # Storage Location                           Storage Location
    PLATE_BARCODE, # Plate Barcode                   Plate Barcode
    str(str(PLATE_NAME) + " " + str(cell_frx)), # Plate Name
    well_position # Well Position
    ]

    return rows

# initialize a new dataframe and begin incrementing placeholder-Registry IDs
i = 1
for cell_frx in CELLULAR_FRACTION:
    
    frx_df_out = pd.DataFrame(columns=frx_columns)
    
    #
    # TODO!!!! 
    # MAP PLATE BARCODES TO INDEX POSITION OF FRACTIONS
    #
    #
    
    new_row = cell_fraction_rows(i, "NUC1", cell_frx) # Generate a row for the 2x cryo nuclei
    new_row[-1] = "A1"
    #frx_df_out = frx_df_out.append(pd.DataFrame([new_row], columns=frx_columns)) # add one row for the first cryo nuclei...
    frx_df_out = pd.concat([frx_df_out, pd.DataFrame([new_row], columns=frx_columns)])
    i += 1
    
    new_row[0] =  str("FRA" + str(i))
    new_row[-1] = "A2"
    #frx_df_out = frx_df_out.append(pd.DataFrame([new_row], columns=frx_columns)) # ... and a second row for the other cryo nuclei
    frx_df_out = pd.concat([frx_df_out, pd.DataFrame([new_row], columns=frx_columns)])
    i += 1
    
    # now loop through all the "real" samples
    for parent in treatment_plate['Registry ID']: 
        new_row = cell_fraction_rows(i, parent, cell_frx)
        #frx_df_out = frx_df_out.append(pd.DataFrame([new_row], columns=frx_columns))
        frx_df_out = pd.concat([frx_df_out, pd.DataFrame([new_row], columns=frx_columns)])
        i += 1
        
    # and finally add the last two wells of NUC1 controls
    new_row = cell_fraction_rows(i, "NUC1", cell_frx) # Generate a row for the 2x cryo nuclei
    new_row[-1] = "H11"
    #frx_df_out = frx_df_out.append(pd.DataFrame([new_row], columns=frx_columns)) # add one row for the first cryo nuclei...
    frx_df_out = pd.concat([frx_df_out, pd.DataFrame([new_row], columns=frx_columns)])
    i += 1
    
    new_row[-1] = "H12"
    new_row[0] = str("FRA" + str(i))
    #frx_df_out = frx_df_out.append(pd.DataFrame([new_row], columns=frx_columns)) # ... and a second row for the other cryo nuclei
    frx_df_out = pd.concat([frx_df_out, pd.DataFrame([new_row], columns=frx_columns)])
    i += 1
    
    frx_out_filename = str(date.today()) + "-" + PLATE_BARCODE + "-" + OUTPUT_SUFFIX + "-" + str(cell_frx) + ".csv"
    frx_df_out.to_csv(os.path.join(output_dir, frx_out_filename), index=False)
    del frx_df_out

# set up for the next chunk (digestion)
chrfrx_out_filename = str(date.today()) + "-" + PLATE_BARCODE + "-" + OUTPUT_SUFFIX + "-Chromatin.csv"

In [5]:
# Python Script 3: Digestion
# 1) Register digested peptides for chromatin (and nucleoplasm if Rep = 1)

# hardcoded variables

VOL = 170
VOL_POOL = 50
SPIKE_STD = "None"
SPIKE_AMOUNT = ""
SPIKE_LOT = ""
DIGEST_PROTOCOL_ID = "PR005-V1"
PREP_BY = "Julia Robbins"
SET = "1"
REP = "1"
PLATE_BARCODE = "PL0003" 
PLATE_NAME = "Set " + SET + " Rep " + REP + " Digested peptides"
WELL_POSITION = ""
OUTPUT_SUFFIX = "digestion"

def peptide_digest_rows(increment, parent_sample, well_position, cell_frx):
    
    name = "lol"
    
    name4plate = PLATE_NAME + " (" + cell_frx + ")"
    
    rows = [
    str("PDG" + str(increment)), # Registry ID - don't do anything with   Registry ID,
    name, # Name (temporary field)                   Name,
    parent_sample, # Parent Sample                   Parent Sample,
    VOL, # Volume (uL)                               Volume (uL),
    "", # Protein Input (ug)                         Protein Input (ug),
    SPIKE_STD, # Spike-In Standard                   Spike-In Standard,
    "", # Spike-In Amount (ng)                       Spike-In Amount (ng)
    "", # Spike-In Lot ID                            Spike-In Lot ID 
    DIGEST_PROTOCOL_ID, # Digest Protocol ID         Digest Protocol ID,
    PREP_BY, # Prepared By                           Prepared By,
    str(date.today()), # Preparation Date            Preparation Date,
    "", # Record Creator                             Record Creator,
    "", # Storage Location                           Storage Location
    "", # Plate Barcode
    name4plate, # Plate Name
    well_position # Well position
    ]

    return rows

def peptide_pool_rows(increment, parent_sample, well_position):
    
    name = "lol"
    
    name4plate = str(PLATE_NAME + " (nuclear, dilute)")
    
    rows = [
    str("PDG" + str(increment)), # Registry ID - don't do anything with   Registry ID,
    name, # Name (temporary field)                   Name,
    parent_sample, # Parent Sample                   Parent Sample,
    VOL_POOL, # Volume (uL)                               Volume (uL),
    "", # Protein Input (ug)                         Protein Input (ug),
    "", # Spike-In Standard                   Spike-In Standard,
    "", # Spike-In Amount (ng)                       Spike-In Amount (ng)
    "", # Spike-In Lot ID                            Spike-In Lot ID 
    "", # Digest Protocol ID         Digest Protocol ID,
    PREP_BY, # Prepared By                           Prepared By,
    str(date.today()), # Preparation Date            Preparation Date,
    "", # Record Creator                             Record Creator,
    "", # Storage Location                            Storage Location
    "", # Plate Barcode
    name4plate, # Plate Name
    well_position # Well position
    ]

    return rows

# read in the previous metadata file
fractionation_plate = pd.read_csv(os.path.join(output_dir, chrfrx_out_filename))
fractionation_plate

# initialize an empty dataframe to store peptide digest results
digest_df_out = pd.DataFrame(columns=digest_columns)
i = 1
if REP == "1":
    
    # loop through all the "real" experimental samples
    for parent in fractionation_plate['Registry ID']: 
        well_pos = str(fractionation_plate.loc[fractionation_plate['Registry ID'] == parent]['Well Position'].iloc[0])
        cell_fraction = str(fractionation_plate.loc[fractionation_plate['Registry ID'] == parent]['Cellular Fraction'].iloc[0])
        new_row = peptide_digest_rows(i, parent, well_pos, cell_fraction)
        #digest_df_out = digest_df_out.append(pd.DataFrame([new_row], columns=digest_columns))
        digest_df_out = pd.concat([digest_df_out, pd.DataFrame([new_row], columns=digest_columns)])
        i += 1
    
    digest_out_filename = str(date.today()) + "-" + PLATE_BARCODE + "-" + OUTPUT_SUFFIX + ".csv"
    digest_df_out.to_csv(os.path.join(output_dir, digest_out_filename), index=False)
    del digest_df_out
    
    # for Rep 1, we'll also be pooling nuc+chrom for DDA
    # so while we're making these digests, also make the bonus pooled plate
    nuc_frx_plate = pd.read_csv(os.path.join(output_dir, frx_out_filename.replace("Chromatin", "Nucleoplasm")))
    fractionation_plate = pd.concat([fractionation_plate, nuc_frx_plate], ignore_index=True)
    
    # initialize an empty datafame to store the digest pool
    df_out_pool = pd.DataFrame(columns = digest_columns) 
    
    # hard code NUC1 by plate positions for A1 and A2 plate positions
    temp = fractionation_plate[fractionation_plate['Well Position'] == "A1"]
    well_pos = str(temp['Well Position'].drop_duplicates().iloc[0])
    related_fra = str(', '.join(list(temp['Registry ID'])))
    new_row = peptide_pool_rows(i, related_fra, well_pos)
    #df_out_pool = df_out_pool.append(pd.DataFrame([new_row], columns=digest_columns))
    df_out_pool = pd.concat([df_out_pool, pd.DataFrame([new_row], columns=digest_columns)])
    i += 1
    
    temp = fractionation_plate[fractionation_plate['Well Position'] == "A2"]
    well_pos = str(temp['Well Position'].drop_duplicates().iloc[0])
    related_fra = str(', '.join(list(temp['Registry ID'])))
    new_row = peptide_pool_rows(i, related_fra, well_pos)
    #df_out_pool = df_out_pool.append(pd.DataFrame([new_row], columns=digest_columns))
    df_out_pool = pd.concat([df_out_pool, pd.DataFrame([new_row], columns=digest_columns)])
    i += 1
    
    for grandparent in fractionation_plate['Parent Sample'].drop_duplicates():
        if grandparent != "NUC1":
            temp = fractionation_plate[fractionation_plate['Parent Sample'] == grandparent]
            well_pos = str(temp['Well Position'].drop_duplicates().iloc[0])
            related_fra = str(', '.join(list(temp['Registry ID'])))
            new_row = peptide_pool_rows(i, related_fra, well_pos)
            #df_out_pool = df_out_pool.append(pd.DataFrame([new_row], columns=digest_columns))
            df_out_pool = pd.concat([df_out_pool, pd.DataFrame([new_row], columns=digest_columns)])
            i += 1
    
    # hard code NUC1 by plate positions for H11 and H12 plate positions
    temp = fractionation_plate[fractionation_plate['Well Position'] == "H11"]
    well_pos = str(temp['Well Position'].drop_duplicates().iloc[0])
    related_fra = str(', '.join(list(temp['Registry ID'])))
    new_row = peptide_pool_rows(i, related_fra, well_pos)
    #df_out_pool = df_out_pool.append(pd.DataFrame([new_row], columns=digest_columns))
    df_out_pool = pd.concat([df_out_pool, pd.DataFrame([new_row], columns=digest_columns)])
    i += 1
    
    temp = fractionation_plate[fractionation_plate['Well Position'] == "H12"]
    well_pos = str(temp['Well Position'].drop_duplicates().iloc[0])
    related_fra = str(', '.join(list(temp['Registry ID'])))
    new_row = peptide_pool_rows(i, related_fra, well_pos)
    #df_out_pool = df_out_pool.append(pd.DataFrame([new_row], columns=digest_columns))
    df_out_pool = pd.concat([df_out_pool, pd.DataFrame([new_row], columns=digest_columns)])
    i += 1
    
    # save the pooled digest metadata CSV 
    out_pool_filename = str(date.today()) + "-" + PLATE_BARCODE + "-" + "pooled-digest" + ".csv"
    df_out_pool.to_csv(os.path.join(output_dir, out_pool_filename), index=False)
    del df_out_pool

else:
    
    fractionation_plate = fractionation_plate[(fractionation_plate['Cellular Fraction'] == 'Chromatin')]

    # loop through all the chromatin samples from the fractionation plate to create digests
    for parent in fractionation_plate['Registry ID']: 
        well_pos = str(fractionation_plate.loc[fractionation_plate['Registry ID'] == parent]['Well Position'].iloc[0])
        cell_fraction = "Chromatin"
        new_row = peptide_digest_rows(i, parent, well_pos, cell_fraction)
        #digest_df_out = digest_df_out.append(pd.DataFrame([new_row], columns=digest_columns))
        digest_df_out = pd.concat([digest_df_out, pd.DataFrame([new_row], columns=digest_columns)])
        i += 1
    digest_out_filename = str(date.today()) + "-" + PLATE_BARCODE + "-" + OUTPUT_SUFFIX + ".csv"
    digest_df_out.to_csv(os.path.join(output_dir, digest_out_filename), index=False)
    del digest_df_out
                                


In [12]:
# Python Script 5: MS runs
# Register mass spectrometry runs for provided peptides

# hardcoded variables

PREP_BY = "Andrea Gutierrez"
ACQUISITION_TYPE = "DDA" # DDA OR DIA
GRAD_LENGTH = "44"
INSTRUMENT = "timsTOF SCP"
INSTRUMENT_LC = "Evosep"
LC_DECK_POSITION = ""
COLUMN_ID = "MSC1"
INJECT_VOL = "26"
EVOTIP_LOT_NUM = "AB1134"
SAMPLE_LOAD_PROTOCOL = "PR008-V1"
SET = "1"
REP = "1"
PLATE_BARCODE = "PL0004" 
PLATE_NAME = "Set " + SET + " Rep " + REP + ACQUISITION_TYPE + " Evotips"
WELL_POSITION = ""
OUTPUT_SUFFIX = "msruns"
EVOSEP_RACK = 'S3'

def ms_run_rows(increment, sample, well_position):
    
    name = '' #blank name for now
    
    # look up treatment based on well position from the temp merged dataframe
    culture_name = temp_df.loc[temp_df['Registry ID_x'] == sample]['Name_y'].iloc[0]
    
    if pd.isnull(culture_name):
        trx = "NUC1"
    elif "DMSO" in culture_name:
        trx = "DMSO"
    elif "TAL0000087" in culture_name:
        trx = "dBET6"
    else:
        trx = culture_name.split(" ")[0]
        
    name = "SET"+SET+"REP"+REP+well_position+"_"+trx
    
    rows = [
    str("MSR" + str(increment)), # "Registry ID"
    name, # "Run Name",
    str(date.today()), # "Run Date",
    PREP_BY, # "Prepared By",
    ACQUISITION_TYPE, # "Acquisition Type",
    GRAD_LENGTH, # "Gradient Length (Minutes)",
    INSTRUMENT, #"Instrument",
    INSTRUMENT_LC, #"Instrument LC Type",
    LC_DECK_POSITION, # "LC Deck Position",
    COLUMN_ID, # "Column ID"
    sample, # "Peptide Digest IDs",
    "", # "Sample Loaded Mass"
    "", # "Sample Loaded Mass Unit"   
    INJECT_VOL, # "Injection Volume (uL)",
    "", # "Injection Mass",
    "", # "Injection Mass Unit"
    EVOTIP_LOT_NUM, # "Evotip Lot Number",
    SAMPLE_LOAD_PROTOCOL, # "Sample Loading Protocol ID",
    "", # "MS Acquisition Protocol ID",
    "", # "Record Creator",
    "", # "Evotip Rack Barcode",
    "", # "Evotip Rack Name"
    well_position #"Tip Position"
    ]

    return rows

# ASSUME DIGESTION PLATE HAS ALREADY BEEN GENERATED PER TOY EXAMPLE BELOW
digestion_plate_file = os.path.join(output_dir, digest_out_filename)
digestion_plate = pd.read_csv(digestion_plate_file)
digestion_plate

## USE RELATED CULTURE SHEET TO NAME MASS SPEC FILES
## SO THAT THE FILE NAMES INCLUDE THE TREATMENT METADATA
og_culture_data = pd.read_csv(os.path.join(output_dir, culture_out_filename))

# make a temp dataframe by merging the lcms run df with the og culture metadata
temp_df = pd.merge(digestion_plate, og_culture_data, on="Well Position", how="outer")

# initialize an empty dataframe to store the LC-MS run information
ms_df_out = pd.DataFrame(columns=msrun_columns)
i = 1
for parent in digestion_plate['Registry ID']: 
    well_pos = str(digestion_plate.loc[digestion_plate['Registry ID'] == parent]['Well Position'].iloc[0])
    new_row = ms_run_rows(i, parent, well_pos)
    #ms_df_out = ms_df_out.append(pd.DataFrame([new_row], columns=msrun_columns))
    ms_df_out = pd.concat([ms_df_out, pd.DataFrame([new_row], columns=msrun_columns)])
    i += 1

#ms_df_out

msrun_out_filename = str(date.today()) + "-" + PLATE_BARCODE + "-" + OUTPUT_SUFFIX + ".csv"
ms_df_out.to_csv(os.path.join(output_dir, msrun_out_filename), index=False)
#del ms_df_out

Unnamed: 0,Vial,Status,Sample ID,Volume [ul],Data Path,Method Set,Separation Method,Injection Method,MS Method
0,S3-F8,,MSR131,,,,30 Samples per Day,Standard,PLACEHOLDER
0,S3-H3,,MSR151,,,,30 Samples per Day,Standard,PLACEHOLDER
0,S3-H8,,MSR16,,,,30 Samples per Day,Standard,PLACEHOLDER
0,S3-B8,,MSR143,,,,30 Samples per Day,Standard,PLACEHOLDER
0,S3-H9,,MSR124,,,,30 Samples per Day,Standard,PLACEHOLDER
...,...,...,...,...,...,...,...,...,...
0,S3-H2,,MSR106,,,,30 Samples per Day,Standard,PLACEHOLDER
0,S3-D3,,MSR146,,,,30 Samples per Day,Standard,PLACEHOLDER
0,S3-E3,,MSR4,,,,30 Samples per Day,Standard,PLACEHOLDER
0,S3-A11,,MSR104,,,,30 Samples per Day,Standard,PLACEHOLDER


In [None]:
# Python Script 5b: LC-MS queue
# Create a timsTOF queue from the registered mass spec peptide samples

# hardcoded variables at top of this chunk

def lcms_queue_rows(increment, sample_id, well_position):
    
    if ACQUISITION_TYPE == "DIA":
        separation_method = '60 Samples per Day'
        ms_method = "D:\Methods\Default Application Methods\OTOF\timsTOF SCP\Proteomics TIMS on\diaPASEF_TALUS_400_1000_0.7cycle.m?OtofImpacTEMControl"
    elif ACQUISITION_TYPE == "DDA":
        separation_method = '30 Samples per Day'
        ms_method = 'D:\Methods\Default Application Methods\OTOF\timsTOF SCP\Proteomics TIMS on\DDA PASEF-low_sample_amount_0.6sec_cycletime_ShortGradient_150ms_4cycles.m'

    vial_id = EVOSEP_RACK + "-" + well_position
        
    rows = [
        vial_id, # 'Vial', # S# is evosep rack position. There are positions S1 through S6
        '', # 'Status',
        sample_id, #'Sample ID', # Yes - those filenames are the ones that will be uploaded to AWS
        '', # 'Volume [ul]',
        '', # 'Data Path',
        '', # 'Method Set',
        separation_method, # 'Separation Method',
        'Standard', # 'Injection Method',
        ms_method # 'MS Method'
    ]
    
    return(rows)


# shuffle the samples
randomized_lcms_df = ms_df_out.sample(frac = 1)

# generate an LC-MS queue
lcms_queue_out = pd.DataFrame(columns=lcms_queue_columns)
i = 1
for sample in randomized_lcms_df['Registry ID']: 
    well_pos = str(randomized_lcms_df.loc[randomized_lcms_df['Registry ID'] == sample]['Tip Position'].iloc[0])
    
    #TODO
    # sample_id = MSR# + SETXREPXA1 + trx + acquisition
    #print(sample)
    ms_filename = str(randomized_lcms_df.loc[randomized_lcms_df['Registry ID'] == sample]['Run Name'].iloc[0])
    
    new_row = lcms_queue_rows(i, ms_filename, well_pos)
    #lcms_queue_out = lcms_queue_out.append(pd.DataFrame([new_row], columns=lcms_queue_columns))
    lcms_queue_out = pd.concat([lcms_queue_out, pd.DataFrame([new_row], columns=lcms_queue_columns)])
    i += 1

msrun_out_filename = str(date.today()) + "-" + PLATE_BARCODE + "-" + "lcms_queue" + ".csv"
lcms_queue_out.to_csv(os.path.join(output_dir, msrun_out_filename), index=False)
#del ms_df_out

lcms_queue_out