In [1]:
# Python Script 2: Fractionation
# 1) Register 288 new samples
# 2) Register 3 Fraction Plate maps

import pandas as pd
import requests
import json
from datetime import date
import numpy as np
import os


# hardcoded variables

CELLULAR_FRACTION = ['Nucleoplasm', 'Chromatin', 'Insoluble'] # iterate through each to get the full 3x fractionation
TX_TIME = 4
TX_TIME_UNIT = "hours"
VOL = 125
HARVEST_PROTOCOL_ID = "PR003-V1"
FRX_PROTOCOL_ID = "PR004-V1"
PREP_BY = "Bodhi Hueffmeier"

PLATE_BARCODE = ""
PLATE_NAME = ""
WELL_POSITION = ""

In [2]:
# ASSUME TREATMENT PLATE HAS ALREADY BEEN GENERATED PER TOY EXAMPLE BELOW
meta_dir = 'G:/My Drive/Lindsay Pino/proj/2023_scispot_utils/data' # note: link to GDrive location? S3?
treatment_plate_file = os.path.join(meta_dir, "1K THP-1 Screen - Metadata MVP test case - 01-Cell Culture.csv")
treatment_plate = pd.read_csv(treatment_plate_file)
treatment_plate

Unnamed: 0,Registry ID,Cell Type,Name,Culture State,Stock Type,Mycoplasma Test,Passage,Supplier,Supplier Batch ID,Received Date,...,Freezing Protocol ID,Culture Protocol ID,Treatment Protocol ID,Prepared By,Preparation Date,Record Creator,Storage Location,Plate Barcode,Plate Name,Well Position
0,CUL1099,CEL1,DMSO Rep 1,Active,,,,,,,...,,,PR003-V1,Bodhi Hueffmeier,8/21/2023,,,PL0001,Set 1 Rep 1 Treated cells,A3
1,CUL1100,CEL1,dBET6 Rep1,Active,,,,,,,...,,,PR003-V1,Bodhi Hueffmeier,8/21/2023,,,PL0001,Set 1 Rep 1 Treated cells,A4
2,CUL1101,CEL1,TAL446 Rep1,Active,,,,,,,...,,,PR003-V1,Bodhi Hueffmeier,8/21/2023,,,PL0001,Set 1 Rep 1 Treated cells,A5
3,CUL1102,CEL1,TAL431 Rep1,Active,,,,,,,...,,,PR003-V1,Bodhi Hueffmeier,8/21/2023,,,PL0001,Set 1 Rep 1 Treated cells,A6
4,CUL1103,CEL1,TAL485 Rep1,Active,,,,,,,...,,,PR003-V1,Bodhi Hueffmeier,8/21/2023,,,PL0001,Set 1 Rep 1 Treated cells,A7
5,CUL1104,CEL1,TAL412 Rep1,Active,,,,,,,...,,,PR003-V1,Bodhi Hueffmeier,8/21/2023,,,PL0001,Set 1 Rep 1 Treated cells,A8
6,CUL1105,CEL1,TAL570 Rep1,Active,,,,,,,...,,,PR003-V1,Bodhi Hueffmeier,8/21/2023,,,PL0001,Set 1 Rep 1 Treated cells,A9
7,CUL1106,CEL1,TAL574 Rep1,Active,,,,,,,...,,,PR003-V1,Bodhi Hueffmeier,8/21/2023,,,PL0001,Set 1 Rep 1 Treated cells,A10
8,CUL1107,CEL1,TAL582 Rep1,Active,,,,,,,...,,,PR003-V1,Bodhi Hueffmeier,8/21/2023,,,PL0001,Set 1 Rep 1 Treated cells,A11
9,CUL1108,CEL1,TAL428 Rep1,Active,,,,,,,...,,,PR003-V1,Bodhi Hueffmeier,8/21/2023,,,PL0001,Set 1 Rep 1 Treated cells,A12


In [3]:
# 1) Register 288 new samples
# i.e. 3x 96 samples

def cell_fraction_rows(name, parent_sample, cell_frx):
    name = ""
    
    rows = [
    "AUTOMATIC", # Registry ID - don't do anything with   Registry ID,
    name, # Name (temporary field)                   Name,
    parent_sample, # Parent Sample                   Parent Sample, 
    cell_frx, # Cellular Fraction                    Cellular Fraction,
    TX_TIME, # Post-Treatment Time Point             Post-Treatment Time Point,
    TX_TIME_UNIT, # Time Point Unit                  Time Point Unit
    VOL, # Volume (uL)                               Volume (uL),
    "", # Protein Concentration (ug/uL)              Protein Concentration (ug/uL),
    HARVEST_PROTOCOL_ID, # Harvest Protocol ID       Harvest Protocol ID,
    FRX_PROTOCOL_ID, # Fractionation Protocol ID     Fractionation Protocol ID,
    PREP_BY, # Prepared By                           Prepared By,
    str(date.today()), # Preparation Date            Preparation Date,
    "", # Record Creator                             Record Creator,
    "", # Storage Location                           Storage Location
    PLATE_BARCODE, # Plate Barcode                   Plate Barcode
    PLATE_NAME, # Plate Name
    WELL_POSITION # Well Position
    ]

    return rows

columns = ["Registry ID",
           "Name",
           "Parent Sample",
           "Cellular Fraction",
           "Post-Treatment Time Point",
           "Time Point Unit",
           "Volume (uL)",
           "Protein concentration (ug/ul)",
           "Harvest Protocol ID",
           "Fractionation Protocol ID",
           "Prepared By",
           "Preparation Date",
           "Record Creator",
           "Storage Location",
           "Plate Barcode",
           "Plate Name",
           "Well Position"]

df_out = pd.DataFrame(columns=columns)

for cell_frx in CELLULAR_FRACTION:
    new_row = cell_fraction_rows("lol", "NUC1", cell_frx) # Generate a row for the 2x cryo nuclei
    df_out = df_out.append(pd.DataFrame([new_row], columns=columns)) # add one row for the first cryo nuclei...
    df_out = df_out.append(pd.DataFrame([new_row], columns=columns)) # ... and a second row for the other cryo nuclei
    for parent in treatment_plate['Registry ID']: # now loop through all the "real" samples
        new_row = cell_fraction_rows("lol", parent, cell_frx)
        df_out = df_out.append(pd.DataFrame([new_row], columns=columns))



In [4]:
df_out

Unnamed: 0,Registry ID,Name,Parent Sample,Cellular Fraction,Post-Treatment Time Point,Time Point Unit,Volume (uL),Protein concentration (ug/ul),Harvest Protocol ID,Fractionation Protocol ID,Prepared By,Preparation Date,Record Creator,Storage Location,Plate Barcode,Plate Name,Well Position
0,AUTOMATIC,,NUC1,Nucleoplasm,4,hours,125,,PR003-V1,PR004-V1,Bodhi Hueffmeier,2023-08-10,,,,,
0,AUTOMATIC,,NUC1,Nucleoplasm,4,hours,125,,PR003-V1,PR004-V1,Bodhi Hueffmeier,2023-08-10,,,,,
0,AUTOMATIC,,CUL1099,Nucleoplasm,4,hours,125,,PR003-V1,PR004-V1,Bodhi Hueffmeier,2023-08-10,,,,,
0,AUTOMATIC,,CUL1100,Nucleoplasm,4,hours,125,,PR003-V1,PR004-V1,Bodhi Hueffmeier,2023-08-10,,,,,
0,AUTOMATIC,,CUL1101,Nucleoplasm,4,hours,125,,PR003-V1,PR004-V1,Bodhi Hueffmeier,2023-08-10,,,,,
0,AUTOMATIC,,CUL1102,Nucleoplasm,4,hours,125,,PR003-V1,PR004-V1,Bodhi Hueffmeier,2023-08-10,,,,,
0,AUTOMATIC,,CUL1103,Nucleoplasm,4,hours,125,,PR003-V1,PR004-V1,Bodhi Hueffmeier,2023-08-10,,,,,
0,AUTOMATIC,,CUL1104,Nucleoplasm,4,hours,125,,PR003-V1,PR004-V1,Bodhi Hueffmeier,2023-08-10,,,,,
0,AUTOMATIC,,CUL1105,Nucleoplasm,4,hours,125,,PR003-V1,PR004-V1,Bodhi Hueffmeier,2023-08-10,,,,,
0,AUTOMATIC,,CUL1106,Nucleoplasm,4,hours,125,,PR003-V1,PR004-V1,Bodhi Hueffmeier,2023-08-10,,,,,


* how should we handle the "extra" plate-location information? 