## Cellcyte Image Processing
### Dataset ID - 0FB0444E
**Dataset name:** 20230328_co-upfront_BRAFi+P2RX7i   
**Date:** 2023-05-01  
**By:** Monica Del Valle

---

In [1]:
import sys
import os
import re
import requests
from io import StringIO
import pandas as pd
import numpy as np
from datetime import datetime
from config import config

### Data Export - RedCap API

In [3]:
EXP = '0FB0444E'

get_record = {
    'token': config['api_token'],
    'content': 'record',
    'format': 'csv',
    'type': 'flat',
    'filterLogic': f"[dataset_id]='{EXP}'"
}

record_response = requests.post(config['api_url'], data=get_record)
print('get record HTTP Status: ' + str(record_response.status_code))

exp_data = StringIO(record_response.text)
exp_df = pd.read_csv(exp_data, sep=',', dtype='str')
exp_df

get record HTTP Status: 200


Unnamed: 0,record_id,request_user,expt_owner,contact,dataset_id,dataset_name,expt_date,time_drug_added,timepoints,ch1_name,ch2_name,plate_map_file,cellcyte_experiment_image_processing_request_form_complete
0,2,Jordon Brinkley,Philip Stauffer,Slack,0FB0444E,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,2023-03-28 11:30,21,Red,Green,20230328_BRAFi-P2RX7i_co-upfront_platemap.tsv,2


In [5]:
# plate map file export
get_file = {
    'token': config['api_token'],
    'content': 'file',
    'action': 'export',
    'record': exp_df['record_id'].values[0],
    'field': 'plate_map_file',
    'returnFormat': 'csv'
}

file_response = requests.post(config['api_url'], data=get_file)
print('get file HTTP Status: ' + str(file_response.status_code))

file_data = StringIO(file_response.text)
pmap = pd.read_csv(file_data, sep='\t')
pmap

get file HTTP Status: 200


Unnamed: 0,well,cell.line,drug1,drug1.conc,drug1.units,drug2,drug2.conc,drug2.units
0,A01,,,,M,,,M
1,A02,,,,M,,,M
2,A03,,,,M,,,M
3,A04,,,,M,,,M
4,A05,,,,M,,,M
...,...,...,...,...,...,...,...,...
91,H08,,,,M,,,M
92,H09,,,,M,,,M
93,H10,,,,M,,,M
94,H11,,,,M,,,M


In [6]:
# root directory of cellcyte data
TOP_DIR = '/mnt/monica/quaranta2/Cellcyte'
EXP_DATE = exp_df['expt_date'].values[0]
EXP_NAME = exp_df['dataset_name'].values[0]

EXP_DIR = f'{TOP_DIR}/{EXP_DATE}_{EXP}'
IMG_DIR = f'{EXP_DIR}/images'
SEG_DIR = f'{EXP_DIR}/segmentation'

DRUG = exp_df['time_drug_added'].values[0]

SAVE_DIR = f'{EXP_DIR}/processed_data'

---
## Preprocessing

In [8]:
# sample image file name:
# S4_B3_P9_10X_C-R_20220930-155516_Z_-3.443379.png
# ? | well | position | ? | channel-R,G,EC | YYYYMMDD-HHMMSS | 

def fixWellName(well_name):
    # function to fix well names by ensuring 3-digit length 
    # (i.e., include preceding 0 in single-digit column numbers) 

    if isinstance(well_name, list):
        return([f'{wn[0]}0{wn[1]}' if len(wn) < 3 else wn for wn in well_name])
    elif(isinstance(well_name, str)):
        if len(well_name) < 3:
            return(f'{well_name[0]}0{well_name[1]}')
            # return well_name[0] + '0' + well_name[1]
    else:
        well_name

def parseFileName(filename):
    filename = os.path.basename(filename)
    x = filename.split(".")[0]
    x = x.split("_")
    well = x[1]
    ch = x[4] # C-CH
    ch = ch.split("-")[1]
    position = x[2][1:] # exclude P
    time = datetime.strptime(x[5], '%Y%m%d-%H%M%S')
    time = time.strftime("%Y-%m-%d %H:%M") # no s
    # time_i = x.split("_")[5]
    out = [well,ch, int(position), time]
    return(out)

### Get Image Filenames

In [10]:
IMG_DIR

'/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0444E/images'

In [12]:
os.chdir(IMG_DIR)

fn = []
files = os.listdir(IMG_DIR)

fn = [os.path.join(IMG_DIR, f) for f in files if ".png" in f]

# remove .DS_Store (hiddent Spotlight) files, if present
fn = [f for f in fn if ".DS_Store" not in f]

print(f"{len(fn)} files were found.")

if(os.path.isfile(fn[0])):
    print(f"The file {os.path.basename(fn[0])} has a complete path.")
else:
    print(f"The file {os.path.basename(fn[0])} does NOT have a complete path.")
    
fn.sort()

12096 files were found.
The file S4_G9_P1_10X_C-EC_20230329-065231_Z_-2.015429.png has a complete path.


### Create Image Data File

Extract image information from filenames

In [13]:
file_info = pd.DataFrame([parseFileName(x) for x in fn])
file_info.columns = ['well','ch', 'position', 'image_time']
file_info['file_name'] = fn
file_info

Unnamed: 0,well,ch,position,image_time,file_name
0,B2,EC,1,2023-03-28 16:33,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...
1,B2,EC,1,2023-03-28 23:33,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...
2,B2,EC,1,2023-03-29 06:33,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...
3,B2,EC,1,2023-03-29 13:33,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...
4,B2,EC,1,2023-03-29 20:33,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...
...,...,...,...,...,...
12091,G9,R,4,2023-04-02 08:54,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...
12092,G9,R,4,2023-04-02 15:53,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...
12093,G9,R,4,2023-04-02 22:55,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...
12094,G9,R,4,2023-04-03 05:54,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...


In [14]:
# save
file_info.to_csv(f'{SAVE_DIR}/{EXP_NAME}_image_data.csv', index=False)

In [15]:
# checking for missing images
file_info.ch.value_counts()

EC    4032
R     4032
G     4032
Name: ch, dtype: int64

### Create Task Arguments

Format data for image processing script

In [16]:
# ch1
red = file_info.loc[file_info['ch']=='R',['file_name', 'position', 'well', 'image_time']]
red = red.reset_index(drop=True)
red

Unnamed: 0,file_name,position,well,image_time
0,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,1,B2,2023-03-28 16:33
1,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,1,B2,2023-03-28 23:33
2,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,1,B2,2023-03-29 06:33
3,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,1,B2,2023-03-29 13:33
4,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,1,B2,2023-03-29 20:33
...,...,...,...,...
4027,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,4,G9,2023-04-02 08:54
4028,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,4,G9,2023-04-02 15:53
4029,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,4,G9,2023-04-02 22:55
4030,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,4,G9,2023-04-03 05:54


In [17]:
# fix wells
wells = fixWellName(red['well'].tolist())
wells_fix = pd.Series(wells)
red = red.assign(well=wells_fix)
red

Unnamed: 0,file_name,position,well,image_time
0,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,1,B02,2023-03-28 16:33
1,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,1,B02,2023-03-28 23:33
2,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,1,B02,2023-03-29 06:33
3,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,1,B02,2023-03-29 13:33
4,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,1,B02,2023-03-29 20:33
...,...,...,...,...
4027,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,4,G09,2023-04-02 08:54
4028,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,4,G09,2023-04-02 15:53
4029,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,4,G09,2023-04-02 22:55
4030,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,4,G09,2023-04-03 05:54


In [18]:
# ch2
green = file_info.loc[file_info['ch']=='G',['file_name', 'position', 'well', 'image_time']]
green = green.reset_index(drop=True)
green

Unnamed: 0,file_name,position,well,image_time
0,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,1,B2,2023-03-28 16:33
1,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,1,B2,2023-03-28 23:33
2,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,1,B2,2023-03-29 06:33
3,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,1,B2,2023-03-29 13:33
4,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,1,B2,2023-03-29 20:33
...,...,...,...,...
4027,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,4,G9,2023-04-02 08:54
4028,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,4,G9,2023-04-02 15:53
4029,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,4,G9,2023-04-02 22:55
4030,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,4,G9,2023-04-03 05:55


In [19]:
# fix wells
wells = fixWellName(green['well'].tolist())
wells_fix = pd.Series(wells)
green = green.assign(well=wells_fix)
green

Unnamed: 0,file_name,position,well,image_time
0,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,1,B02,2023-03-28 16:33
1,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,1,B02,2023-03-28 23:33
2,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,1,B02,2023-03-29 06:33
3,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,1,B02,2023-03-29 13:33
4,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,1,B02,2023-03-29 20:33
...,...,...,...,...
4027,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,4,G09,2023-04-02 08:54
4028,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,4,G09,2023-04-02 15:53
4029,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,4,G09,2023-04-02 22:55
4030,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,4,G09,2023-04-03 05:55


In [20]:
red = red.sort_values(by=['well', 'position'], ignore_index=True)

In [21]:
green = green.sort_values(by=['well', 'position'], ignore_index=True)

In [22]:
red['well'].equals(green['well'])

True

In [23]:
red['position'].equals(green['position'])

True

**Note:** Setting plate id to 1 since timepoints for cellcyte are difficult to parse from metadata

In [24]:
# using timepoints as plate_id
taskargs = pd.DataFrame({
                        'ch2_im_path': green['file_name'],
                        'nuc_im_path': red['file_name'],
                        'overwrite': 'FALSE',
                        'plate_id': 1,
                        'regprops': 'FALSE',
                        'save_path': SEG_DIR,
                        'well': red['well']
})

taskargs

Unnamed: 0,ch2_im_path,nuc_im_path,overwrite,plate_id,regprops,save_path,well
0,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,FALSE,1,FALSE,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,B02
1,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,FALSE,1,FALSE,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,B02
2,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,FALSE,1,FALSE,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,B02
3,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,FALSE,1,FALSE,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,B02
4,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,FALSE,1,FALSE,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,B02
...,...,...,...,...,...,...,...
4027,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,FALSE,1,FALSE,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,G09
4028,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,FALSE,1,FALSE,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,G09
4029,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,FALSE,1,FALSE,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,G09
4030,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,FALSE,1,FALSE,/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0...,G09


In [25]:
taskargs.to_csv(f'{SAVE_DIR}/{EXP_NAME}_task_args.csv', index=False)

---
## Image Segmentation

Segmentation script that generates cellcounts and segmented images is run in parallel using Celery

**Commands run:**
```
conda activate improc
cd git-repos/Segmentation-other/py-seg
screen 
conda activate improc
celery -A MXtasksTempo worker --concurrency=120
<ctrl-A,D>
python sendMXtempoJobs.py 2023-04-04_BRAFi_P2RX7i_task_args.csv
```
---
## Assemble Experiment Output

### Get Cellcounts

In [26]:
SEG_DIR

'/mnt/monica/quaranta2/Cellcyte/2023-03-28_0FB0444E/segmentation'

In [27]:
os.chdir(SEG_DIR)

cellcounts = pd.DataFrame()
dirs = os.listdir(SEG_DIR)
dirs.sort()

for dn in dirs:
    print(dn)
    if dn == '.DS_Store': continue
    temp =  pd.DataFrame()
    files = os.listdir(dn)
    # get cellcount files only
    cc_files = [f for f in files if "csv" in f]

    # append all cc files into df
    for f in cc_files:
        t = pd.read_csv(os.path.join(SEG_DIR, str(dn), f))
        
        # parse position/image time from file_name
        parsed = parseFileName(t['file_name'][0])
        t['position'] = parsed[2]
        t['image_time'] = parsed[3]
        temp = temp.append(t, ignore_index=True)
    cellcounts = cellcounts.append(temp)

# sort by well
cellcounts.sort_values(by=['well', 'position'], inplace=True, ignore_index=True)

cellcounts

Plate1


Unnamed: 0,file_name,cell_count,file_name_ch2,ch2_pos,plate_id,well,position,image_time
0,S4_B2_P1_10X_C-R_20230328-163348_Z_-2.282204.png,30,S4_B2_P1_10X_C-G_20230328-163350_Z_-2.270204.png,0,1,B02,1,2023-03-28 16:33
1,S4_B2_P1_10X_C-R_20230329-063333_Z_-2.260525.png,29,S4_B2_P1_10X_C-G_20230329-063335_Z_-2.272525.png,2,1,B02,1,2023-03-29 06:33
2,S4_B2_P1_10X_C-R_20230329-133333_Z_-2.260210.png,43,S4_B2_P1_10X_C-G_20230329-133335_Z_-2.272210.png,3,1,B02,1,2023-03-29 13:33
3,S4_B2_P1_10X_C-R_20230330-033333_Z_-2.260400.png,51,S4_B2_P1_10X_C-G_20230330-033335_Z_-2.272400.png,3,1,B02,1,2023-03-30 03:33
4,S4_B2_P1_10X_C-R_20230329-203335_Z_-2.272612.png,32,S4_B2_P1_10X_C-G_20230329-203337_Z_-2.260612.png,3,1,B02,1,2023-03-29 20:33
...,...,...,...,...,...,...,...,...
4027,S4_G9_P4_10X_C-R_20230401-185432_Z_-2.011656.png,142,S4_G9_P4_10X_C-G_20230401-185434_Z_-2.023656.png,20,1,G09,4,2023-04-01 18:54
4028,S4_G9_P4_10X_C-R_20230328-170223_Z_-2.017941.png,31,S4_G9_P4_10X_C-G_20230328-170225_Z_-2.005941.png,1,1,G09,4,2023-03-28 17:02
4029,S4_G9_P4_10X_C-R_20230328-235338_Z_-2.002242.png,30,S4_G9_P4_10X_C-G_20230328-235340_Z_-2.014242.png,1,1,G09,4,2023-03-28 23:53
4030,S4_G9_P4_10X_C-R_20230330-035329_Z_-2.004844.png,17,S4_G9_P4_10X_C-G_20230330-035331_Z_-2.016844.png,0,1,G09,4,2023-03-30 03:53


In [28]:
cellcounts.to_csv(f'{SAVE_DIR}/{EXP_NAME}_cell_counts.csv', index=False)

### Calculate time (hours)

The time represents the elapsed time in hours from the time drug was added

In [29]:
DRUG

'2023-03-28 11:30'

In [30]:
# get time (hours)
# drug added time
timeX = datetime.strptime(DRUG, '%Y-%m-%d %H:%M')
img_time = pd.to_datetime(temp['image_time'])
diff = img_time - timeX
diff

0      0 days 05:03:00
1      0 days 19:03:00
2      1 days 02:03:00
3      1 days 16:03:00
4      1 days 09:03:00
             ...      
4027   4 days 07:24:00
4028   0 days 05:32:00
4029   0 days 12:23:00
4030   1 days 16:23:00
4031   5 days 04:23:00
Name: image_time, Length: 4032, dtype: timedelta64[ns]

In [31]:
# Get interval between two timestamps in hours
hours = []
for i in diff:
    hours.append(round(i.total_seconds() / 3600, 1))

print(len(hours))

4032


In [32]:
final_df = pd.DataFrame({
    "expt.id": EXP_NAME,
    "expt.date": EXP_DATE,
    "plate.id": EXP,
    "well": temp['well'],
    "position": temp['position'],
    "image.time": temp['image_time'],
    "time": hours
})

final_df

Unnamed: 0,expt.id,expt.date,plate.id,well,position,image.time,time
0,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,0FB0444E,B02,1,2023-03-28 16:33,5.0
1,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,0FB0444E,B02,1,2023-03-29 06:33,19.1
2,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,0FB0444E,B02,1,2023-03-29 13:33,26.1
3,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,0FB0444E,B02,1,2023-03-30 03:33,40.0
4,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,0FB0444E,B02,1,2023-03-29 20:33,33.0
...,...,...,...,...,...,...,...
4027,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,0FB0444E,G09,4,2023-04-01 18:54,103.4
4028,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,0FB0444E,G09,4,2023-03-28 17:02,5.5
4029,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,0FB0444E,G09,4,2023-03-28 23:53,12.4
4030,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,0FB0444E,G09,4,2023-03-30 03:53,40.4


### Add Platemap Data

In [33]:
wells = temp['well'].unique()
wells.tolist()

['B02',
 'B03',
 'B04',
 'B05',
 'B06',
 'B07',
 'B08',
 'B09',
 'C02',
 'C03',
 'C04',
 'C05',
 'C06',
 'C07',
 'C08',
 'C09',
 'D02',
 'D03',
 'D04',
 'D05',
 'D06',
 'D07',
 'D08',
 'D09',
 'E02',
 'E03',
 'E04',
 'E05',
 'E06',
 'E07',
 'E08',
 'E09',
 'F02',
 'F03',
 'F04',
 'F05',
 'F06',
 'F07',
 'F08',
 'F09',
 'G02',
 'G03',
 'G04',
 'G05',
 'G06',
 'G07',
 'G08',
 'G09']

In [34]:
# selecting wells in use
pmap = pmap[pmap['well'].isin(wells)]

In [36]:
# convert drug conc NaN to 0
pmap['drug1.conc'].fillna(0, inplace=True)
pmap['drug2.conc'].fillna(0, inplace=True)
pmap

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  downcast=downcast,


Unnamed: 0,well,cell.line,drug1,drug1.conc,drug1.units,drug2,drug2.conc,drug2.units
13,B02,A375,A-74003,5e-06,M,,0.0,M
14,B03,A375,A-74003,5e-06,M,,0.0,M
15,B04,A375,A-74003,5e-06,M,,0.0,M
16,B05,A375,A-74003,5e-06,M,PLX4720,8e-06,M
17,B06,A375,A-74003,5e-06,M,PLX4720,8e-06,M
18,B07,A375,A-74003,5e-06,M,PLX4720,8e-06,M
19,B08,A375,,0.0,M,PLX4720,8e-06,M
20,B09,A375,,0.0,M,,0.0,M
25,C02,A375,A-74003,1.25e-06,M,,0.0,M
26,C03,A375,A-74003,1.25e-06,M,,0.0,M


In [37]:
drug_info = pd.DataFrame()

# add drug info for every row in dataframe
def add_drug_info(well):
    row = pmap.loc[pmap.well == well]
    return row.iloc[0, 1:]

drug_info = final_df['well'].apply(add_drug_info)
drug_info

Unnamed: 0,cell.line,drug1,drug1.conc,drug1.units,drug2,drug2.conc,drug2.units
0,A375,A-74003,0.000005,M,,0.0,M
1,A375,A-74003,0.000005,M,,0.0,M
2,A375,A-74003,0.000005,M,,0.0,M
3,A375,A-74003,0.000005,M,,0.0,M
4,A375,A-74003,0.000005,M,,0.0,M
...,...,...,...,...,...,...,...
4027,A375,,0.000000,M,,0.0,M
4028,A375,,0.000000,M,,0.0,M
4029,A375,,0.000000,M,,0.0,M
4030,A375,,0.000000,M,,0.0,M


In [38]:
# merge the dataframes
final_df = pd.concat([final_df, drug_info], axis=1)
final_df

Unnamed: 0,expt.id,expt.date,plate.id,well,position,image.time,time,cell.line,drug1,drug1.conc,drug1.units,drug2,drug2.conc,drug2.units
0,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,0FB0444E,B02,1,2023-03-28 16:33,5.0,A375,A-74003,0.000005,M,,0.0,M
1,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,0FB0444E,B02,1,2023-03-29 06:33,19.1,A375,A-74003,0.000005,M,,0.0,M
2,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,0FB0444E,B02,1,2023-03-29 13:33,26.1,A375,A-74003,0.000005,M,,0.0,M
3,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,0FB0444E,B02,1,2023-03-30 03:33,40.0,A375,A-74003,0.000005,M,,0.0,M
4,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,0FB0444E,B02,1,2023-03-29 20:33,33.0,A375,A-74003,0.000005,M,,0.0,M
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4027,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,0FB0444E,G09,4,2023-04-01 18:54,103.4,A375,,0.000000,M,,0.0,M
4028,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,0FB0444E,G09,4,2023-03-28 17:02,5.5,A375,,0.000000,M,,0.0,M
4029,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,0FB0444E,G09,4,2023-03-28 23:53,12.4,A375,,0.000000,M,,0.0,M
4030,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,0FB0444E,G09,4,2023-03-30 03:53,40.4,A375,,0.000000,M,,0.0,M


### Add Cellcount Data

In [39]:
final_df['cell.count'] = temp['cell_count']
final_df['ch2.pos'] = temp['ch2_pos']
final_df['upid'] = EXP
final_df

Unnamed: 0,expt.id,expt.date,plate.id,well,position,image.time,time,cell.line,drug1,drug1.conc,drug1.units,drug2,drug2.conc,drug2.units,cell.count,ch2.pos,upid
0,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,0FB0444E,B02,1,2023-03-28 16:33,5.0,A375,A-74003,0.000005,M,,0.0,M,30,0,0FB0444E
1,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,0FB0444E,B02,1,2023-03-29 06:33,19.1,A375,A-74003,0.000005,M,,0.0,M,29,2,0FB0444E
2,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,0FB0444E,B02,1,2023-03-29 13:33,26.1,A375,A-74003,0.000005,M,,0.0,M,43,3,0FB0444E
3,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,0FB0444E,B02,1,2023-03-30 03:33,40.0,A375,A-74003,0.000005,M,,0.0,M,51,3,0FB0444E
4,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,0FB0444E,B02,1,2023-03-29 20:33,33.0,A375,A-74003,0.000005,M,,0.0,M,32,3,0FB0444E
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4027,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,0FB0444E,G09,4,2023-04-01 18:54,103.4,A375,,0.000000,M,,0.0,M,142,20,0FB0444E
4028,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,0FB0444E,G09,4,2023-03-28 17:02,5.5,A375,,0.000000,M,,0.0,M,31,1,0FB0444E
4029,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,0FB0444E,G09,4,2023-03-28 23:53,12.4,A375,,0.000000,M,,0.0,M,30,1,0FB0444E
4030,20230328_co-upfront_BRAFi+P2RX7i,2023-03-28,0FB0444E,G09,4,2023-03-30 03:53,40.4,A375,,0.000000,M,,0.0,M,17,0,0FB0444E


In [40]:
final_df.to_csv(f'{SAVE_DIR}/{EXP_NAME}_dataset.csv', index=False)