### Reformat LPR behavioral data to have t0-t239
### While reformatting, divide data into 240 and 15 timepoints sets respectively

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os, random, time
from datetime import datetime

import warnings
warnings.filterwarnings('ignore')

In [2]:
starting_dir = os.getcwd()
print (starting_dir)

/Users/kimd999/research/script_not_in_dropbox/srpAnalytics/analysis/latest/1_reformat/all_targets/LPR


In [3]:
# mac - phase I & II - LPR - 240 endpoints
complete_input_file_path= '/Users/kimd999/research/projects/toxicity/per_each_data/phase_I_II/input/LPR/latest/after_merging/tall/344_zf_LPR_data_phase_1_2_2020JUNE25_updated_plate_id_for_TX_tall_fixed_merged.csv'
# -> 196 unique chemical IDs

# mac - phase III - full
#complete_input_file_path= '/Users/kimd999/research/projects/toxicity/per_each_data/phase_III/input/original/behavior/LPR/Tanguay_Phase_3_zf_LPR_data_PNNL_2021MAR23.csv'
# -> 240 unique variables and 215 unique chemical IDs

# constance - phase I & II - LPR - 240 endpoints
#complete_input_file_path= '/people/kimd999/tox/phase_I_II/LPR/input/tall/after_Lisa_fix/344_zf_LPR_data_phase_1_2_2020JUNE25_updated_plate_id_for_TX_tall_fixed_full_w_240_endpoints.csv'

# constance - phase III - full
#complete_input_file_path= '/people/kimd999/tox/phase_III/Tanguay_Phase_3_zf_LPR_data_PNNL_2021MAR23.csv'

In [4]:
df_behav = pd.read_csv(complete_input_file_path, header = 0)
df_behav = df_behav.rename({'endpoint': 'timepoint'}, axis=1)
df_behav = df_behav.rename({'variable': 'timepoint'}, axis=1)

df_behav['chemical.id'] = df_behav['chemical.id'].astype(str)
# this recasting is needed for "df_select_1846 = df_select.loc[df_select['chemical.id'] == '1846',:]" later

display(df_behav.head())
display(df_behav.tail())

Unnamed: 0,chemical.id,conc,plate.id,well,timepoint,value
0,1030,,,,t0,
1,1030,0.0,9414.0,A06,t0,0.0
2,1030,0.0,9414.0,A12,t0,2.7
3,1030,0.0,9414.0,B06,t0,0.5
4,1030,0.0,9414.0,B12,t0,0.0


Unnamed: 0,chemical.id,conc,plate.id,well,timepoint,value
11610475,998,67.0,15822.0,A08,t239,0.0
11610476,998,67.0,15822.0,A09,t239,0.0
11610477,998,67.0,15822.0,A10,t239,0.0
11610478,998,67.0,15822.0,A11,t239,0.0
11610479,998,67.0,15822.0,A12,t239,0.0


In [5]:
# Keep only relevant columns
columns_to_keep = ['chemical.id', 'conc', 'plate.id', 'well', 'timepoint', 'value']
df_select = df_behav.loc[:,columns_to_keep]

display("number of unique chemical.id:" + str(len(np.unique(df_select['chemical.id']))))
display("number of unique timepoints:" + str(len(np.unique(df_select['timepoint']))))

'number of unique chemical.id:344'

'number of unique timepoints:240'

In [6]:
nan = df_select[df_select['value'].isna()]
display(nan.head())

# [phase III] there is no nan in 'chemical.id', 'conc', 'plate.id', 'well', 'variable'

Unnamed: 0,chemical.id,conc,plate.id,well,timepoint,value
0,1030,,,,t0,
193,1030,,,,t1,
386,1030,,,,t2,
579,1030,,,,t3,
772,1030,,,,t4,


In [None]:
'''let me not drop na now for easier proceesing for now

display("before dropna, len(behav_select):"+str(len(df_select)))
df_select = df_select.dropna(how='any')
# phase I & II -> dropped some
# phase III    -> dropped many

display("after dropna,  len(behav_select):"+str(len(df_select)))

display("number of unique chemical.id:" + str(len(np.unique(df_select['chemical.id']))))

df_select['plate.id'] = df_select['plate.id'].astype(int)


display("number of unique plate.id:" + str(len(np.unique(df_select['plate.id']))))
display("unique plate.id:" + str(np.unique(df_select['plate.id'])))

display(df_select.head())'''

In [None]:
'''
df_part = df_select[df_select['chemical.id']=='1967']
display(df_part)
output_filename = "chemical_id_1967.csv"
df_part.to_csv(output_filename, index=False)

df_part = df_select[df_select['chemical.id']=='1030']
display(df_part)
output_filename = "chemical_id_1030.csv"
df_part.to_csv(output_filename, index=False)
'''

### Transpose time points 

In [None]:
# old using groupby
#'''
start_time = time.time()
       
def reformat(chemical_index, df_select, df_reformatted_240_timepoints, df_reformatted_15_timepoints):
    df_per_chemical = df_select.loc[df_select['chemical.id'] == chemical_index,:]
    #display (df_per_chemical.head())

    # Append chemical_plate_well as a unique identifier
    # takes long time (~1 min)
    df_per_chemical.insert(0, 'chemical_plate_well', df_per_chemical.loc[:,['chemical.id','plate.id', 'well']].apply(lambda x: '_'.join(x.map(str)), axis = 1))
    
    for cpw in np.unique(df_per_chemical.chemical_plate_well):
        #print (str(cpw))
        per_cpw = df_per_chemical.loc[df_per_chemical.chemical_plate_well == cpw,:]
        per_cpw_grouped = per_cpw.groupby(['chemical.id', 'plate.id', 'well'])
        for name, group in per_cpw_grouped:
            concat_this = pd.DataFrame(
                    {
                    'chemical.id': np.unique(per_cpw['chemical.id']),
                    'plate.id': np.unique(per_cpw['plate.id']),
                    'well': np.unique(per_cpw['well']),
                    'chemical_plate_well': np.unique(per_cpw['chemical_plate_well']),
                    'conc': np.unique(per_cpw['conc'])
                    })
            
            timepoints_15 = False # init
            # rename timepoint columns if this is for 15 endpoints
            for time_point in np.arange(len(np.unique(group.timepoint))):
                if (len(np.unique(group.timepoint)) == 15):
                    timepoints_15 = True
                    time_point = time_point + 3
                timepoint = 't'+ str(time_point)
                concat_this = pd.concat([concat_this, pd.DataFrame({timepoint: per_cpw.value[per_cpw.timepoint == timepoint].values})],axis = 1)

            if (timepoints_15 == False):
                df_reformatted_240_timepoints = pd.concat([df_reformatted_240_timepoints, concat_this])
            else:
                df_reformatted_15_timepoints = pd.concat([df_reformatted_15_timepoints, concat_this])

    return df_reformatted_240_timepoints, df_reformatted_15_timepoints
########### end of def reformat(chemical_index, behav_select, df_reformatted):


df_reformatted_240_timepoints = pd.DataFrame()
df_reformatted_15_timepoints = pd.DataFrame()

full_devel = "full"
#full_devel = "devel"

chemical_id_from_here = np.unique(df_behav['chemical.id'])

if (full_devel == "devel"):
    randomly_chosen = random.sample(set(chemical_id_from_here), 2)
    chemical_id_from_here = []
    for i in range(len(randomly_chosen)):
        chemical_id_from_here.append(randomly_chosen[i])

#chemical_id_from_here = ['1030', '1119']
# 1119 chemical.id ->  15 timepoints
# 1030 chemical.id -> 240 timepoints

total_number_of_chemicals_to_processed = len(chemical_id_from_here)
number_of_chemicals_processed = 0

for chemical_index in chemical_id_from_here:
    print("\nchemical_index:" + str(chemical_index))

    df_reformatted_240_timepoints, df_reformatted_15_timepoints \
    = reformat(chemical_index, df_select, df_reformatted_240_timepoints, df_reformatted_15_timepoints)
    
    number_of_chemicals_processed += 1
    print_this = str(number_of_chemicals_processed) + " chemicals processed out of " + str(total_number_of_chemicals_to_processed)
    print(print_this)
    
    #display('number of unique chemical.id:', str(len(np.unique(df_reformatted['chemical.id']))))
    
    now = datetime.now()

    current_time = now.strftime("%H:%M:%S")
    print("Current Time =", current_time)

end_time = time.time()
time_took = str(round((end_time-start_time), 1)) + " seconds"
print ("Transposing time points is done. It took " + str(time_took)) 
# took 75 seconds in pnnl laptop for 1 chemical
# took 5~7 hrs in pnnl laptop for 196 chemicals
#'''


chemical_index:1030
1 chemicals processed out of 344
Current Time = 18:22:09

chemical_index:1031
2 chemicals processed out of 344
Current Time = 18:23:17

chemical_index:1105
3 chemicals processed out of 344
Current Time = 18:25:26

chemical_index:1114
4 chemicals processed out of 344
Current Time = 18:27:44

chemical_index:1119
5 chemicals processed out of 344
Current Time = 18:27:51

chemical_index:1161
6 chemicals processed out of 344
Current Time = 18:30:12

chemical_index:1163
7 chemicals processed out of 344
Current Time = 18:31:27

chemical_index:1166
8 chemicals processed out of 344
Current Time = 18:32:55

chemical_index:1167
9 chemicals processed out of 344
Current Time = 18:33:02

chemical_index:1171
10 chemicals processed out of 344
Current Time = 18:34:36

chemical_index:1172
11 chemicals processed out of 344
Current Time = 18:36:57

chemical_index:1174
12 chemicals processed out of 344
Current Time = 18:40:56

chemical_index:1175
13 chemicals processed out of 344
Curren

In [18]:
reformatted_data_filename = str(complete_input_file_path[:-4]) + "_wide_t0_t239_" + str(full_devel) + ".csv"
display ("reformatted_data_filename:", reformatted_data_filename)
df_reformatted_240_timepoints.to_csv(reformatted_data_filename, index=False)

reformatted_data_filename = str(complete_input_file_path[:-4]) + "_wide_t3_t17_" + str(full_devel) + ".csv"
display ("reformatted_data_filename:", reformatted_data_filename)
df_reformatted_15_timepoints.to_csv(reformatted_data_filename, index=False)

'reformatted_data_filename:'

'/Users/kimd999/research/projects/toxicity/per_each_data/phase_I_II/input/LPR/latest/after_merging/tall/344_zf_LPR_data_phase_1_2_2020JUNE25_updated_plate_id_for_TX_tall_fixed_merged_wide_t0_t239_full.csv'

'reformatted_data_filename:'

'/Users/kimd999/research/projects/toxicity/per_each_data/phase_I_II/input/LPR/latest/after_merging/tall/344_zf_LPR_data_phase_1_2_2020JUNE25_updated_plate_id_for_TX_tall_fixed_merged_wide_t3_t17_full.csv'

## below is investigational

In [15]:
display(df_reformatted.head())
display(df_reformatted.tail())
display(df_reformatted.shape)

Unnamed: 0,chemical.id,plate.id,well,chemical_plate_well,conc,t0,t1,t2,t3,t4,...,t230,t231,t232,t233,t234,t235,t236,t237,t238,t239
0,1030,9414.0,A01,1030_9414.0_A01,50.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,1030,9414.0,A02,1030_9414.0_A02,35.6,0.0,1.2,0.0,0.0,0.0,...,0.0,0.0,3.1,12.7,16.3,11.7,13.9,16.0,20.0,18.6
0,1030,9414.0,A03,1030_9414.0_A03,11.2,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.6,6.4,7.1,7.6,12.2,13.7,5.5,0.0
0,1030,9414.0,A04,1030_9414.0_A04,5.0,0.0,1.7,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,1030,9414.0,A05,1030_9414.0_A05,1.0,3.0,0.0,0.0,0.0,0.0,...,1.8,14.4,24.2,29.3,16.7,30.8,2.3,0.0,6.4,8.7


Unnamed: 0,chemical.id,plate.id,well,chemical_plate_well,conc,t0,t1,t2,t3,t4,...,t230,t231,t232,t233,t234,t235,t236,t237,t238,t239
0,1119,2.0,H5,1119_2.0_H5,0.0064,,,,19.7,33.0,...,,,,,,,,,,
0,1119,2.0,H6,1119_2.0_H6,0.0,,,,0.3,0.0,...,,,,,,,,,,
0,1119,2.0,H7,1119_2.0_H7,64.0,,,,64.6,57.6,...,,,,,,,,,,
0,1119,2.0,H8,1119_2.0_H8,6.4,,,,17.5,0.0,...,,,,,,,,,,
0,1119,2.0,H9,1119_2.0_H9,0.64,,,,17.5,21.9,...,,,,,,,,,,


(384, 245)

In [12]:
reformatted_data_filename = str(complete_input_file_path[:-4]) + "_wide_t0_t239_" + str(full_devel) + "_1030.csv"
display ("reformatted_data_filename:", reformatted_data_filename)
df_reformatted.to_csv(reformatted_data_filename,index=False)

'reformatted_data_filename:'

'/Users/kimd999/research/projects/toxicity/per_each_data/phase_I_II/input/LPR/latest/after_merging/tall/344_zf_LPR_data_phase_1_2_2020JUNE25_updated_plate_id_for_TX_tall_fixed_merged_wide_t0_t239_full_1030.csv'

In [36]:
display("before dropna, len(reformatted):"+str(len(df_reformatted)))
display("number of unique chemical.id:" + str(len(np.unique(df_reformatted['chemical.id']))))
display("unique chemical.id:" + str(np.unique(df_reformatted['chemical.id'])))


df_reformatted = df_reformatted.dropna(how='any')
display("after dropna,  len(reformatted):"+str(len(reformatted)))

df_reformatted['plate.id'] = df_reformatted['plate.id'].astype(int)

display(df_reformatted.head())

# phase I & II -> dropped some
# phase III    -> dropped many

display(df_reformatted[df_reformatted['plate.id'].isna()])

#reformatted['plate.id'] = reformatted['plate.id'].astype(int)

'before dropna, len(reformatted):32251'

'number of unique chemical.id:134'

"unique chemical.id:['1031' '1105' '1114' '1161' '1163' '1166' '1171' '1172' '1174' '1175'\n '1177' '1182' '1211' '1221' '129' '1307' '1354' '1370' '1371' '155'\n '1595' '1603' '1611' '1612' '1613' '1614' '1616' '1619' '1620' '1629'\n '1651' '1670' '1770' '1854' '1861' '1930' '1965' '1967' '2058' '2067'\n '2142' '220' '2323' '246' '247' '248' '251' '252' '2532' '2601' '2635'\n '2637' '2739' '2740' '2751' '2770' '2778' '281' '2813' '283' '285' '296'\n '2973' '3002' '3004' '3005' '3091' '3102' '3114' '3138' '3144' '3147'\n '3151' '3273' '3411' '3412' '3413' '360' '365' '3689' '3690' '3757'\n '3758' '3759' '3760' '381' '384' '3859' '3860' '392' '409' '412' '418'\n '441' '500' '501' '502' '503' '504' '514' '517' '54' '56' '58' '596'\n '597' '601' '602' '61' '618' '62' '628' '63' '64' '65' '667' '674' '689'\n '691' '746' '747' '769' '770' '771' '774' '775' '778' '946' '947' '948'\n '952' '961' '966' '998']"

'after dropna,  len(reformatted):32251'

Unnamed: 0,chemical.id,plate.id,well,chemical_plate_well,conc,t0,t1,t2,t3,t4,...,t230,t231,t232,t233,t234,t235,t236,t237,t238,t239
0,1031,13522,A01,1031_13522.0_A01,50.0,19.523,13.706,2.196,0.0,12.619,...,9.135,14.572,11.551,0.415,1.246,8.556,14.134,19.201,30.299,20.82
0,1031,13522,A02,1031_13522.0_A02,35.6,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,1031,13522,A03,1031_13522.0_A03,11.2,0.779,3.014,17.207,11.661,0.699,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,1031,13522,A04,1031_13522.0_A04,5.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,1031,13522,A05,1031_13522.0_A05,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Unnamed: 0,chemical.id,plate.id,well,chemical_plate_well,conc,t0,t1,t2,t3,t4,...,t230,t231,t232,t233,t234,t235,t236,t237,t238,t239


In [30]:
display(df_reformatted.head())
display(df_reformatted.shape)

display("number of unique chemical.id:" + str(len(np.unique(df_reformatted['chemical.id']))))
display("unique chemical.id:" + str(np.unique(df_reformatted['chemical.id'])))

Unnamed: 0,chemical.id,plate.id,well,chemical_plate_well,conc,t0,t1,t2,t3,t4,...,t230,t231,t232,t233,t234,t235,t236,t237,t238,t239
0,1031,13522,A01,1031_13522.0_A01,50.0,19.523,13.706,2.196,0.0,12.619,...,9.135,14.572,11.551,0.415,1.246,8.556,14.134,19.201,30.299,20.82
0,1031,13522,A02,1031_13522.0_A02,35.6,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,1031,13522,A03,1031_13522.0_A03,11.2,0.779,3.014,17.207,11.661,0.699,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,1031,13522,A04,1031_13522.0_A04,5.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,1031,13522,A05,1031_13522.0_A05,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


(32251, 245)

'number of unique chemical.id:134'

"unique chemical.id:['1031' '1105' '1114' '1161' '1163' '1166' '1171' '1172' '1174' '1175'\n '1177' '1182' '1211' '1221' '129' '1307' '1354' '1370' '1371' '155'\n '1595' '1603' '1611' '1612' '1613' '1614' '1616' '1619' '1620' '1629'\n '1651' '1670' '1770' '1854' '1861' '1930' '1965' '1967' '2058' '2067'\n '2142' '220' '2323' '246' '247' '248' '251' '252' '2532' '2601' '2635'\n '2637' '2739' '2740' '2751' '2770' '2778' '281' '2813' '283' '285' '296'\n '2973' '3002' '3004' '3005' '3091' '3102' '3114' '3138' '3144' '3147'\n '3151' '3273' '3411' '3412' '3413' '360' '365' '3689' '3690' '3757'\n '3758' '3759' '3760' '381' '384' '3859' '3860' '392' '409' '412' '418'\n '441' '500' '501' '502' '503' '504' '514' '517' '54' '56' '58' '596'\n '597' '601' '602' '61' '618' '62' '628' '63' '64' '65' '667' '674' '689'\n '691' '746' '747' '769' '770' '771' '774' '775' '778' '946' '947' '948'\n '952' '961' '966' '998']"

In [None]:
# new, not using groupby
#'''
start_time = time.time()
       
def reformat(chemical_index, df_select, reformatted):
    df_per_chemical = df_select.loc[df_select['chemical.id'] == chemical_index,:]
    #print (behav_per_chemical)

    # Append chemical_plate_well as a unique identifier
    # takes long time (~1 min)
    df_per_chemical.insert(0, 'chemical_plate_well', df_per_chemical.loc[:,['chemical.id','plate.id', 'well']].apply(lambda x: '_'.join(x.map(str)), axis = 1))
    
    for cpw in np.unique(df_per_chemical.chemical_plate_well):
        #print (str(cpw))
        per_cpw = df_per_chemical.loc[df_per_chemical.chemical_plate_well == cpw,:]
        per_cpw_grouped = per_cpw.groupby(['chemical.id', 'plate.id', 'well'])
        for name, group in per_cpw_grouped:
            concat_this = pd.DataFrame(
                    {
                    'chemical.id': np.unique(per_cpw['chemical.id']),
                    'plate.id': np.unique(per_cpw['plate.id']),
                    'well': np.unique(per_cpw['well']),
                    'chemical_plate_well': np.unique(per_cpw['chemical_plate_well']),
                    'conc': np.unique(per_cpw['conc'])
                    })
            
            # rename endpoint columns if this is for 15 endpoints
            for time_point in np.arange(len(np.unique(group.timepoint))):
                if (len(np.unique(group.timepoint)) == 15):
                    time_point = time_point + 3
                timepoint = 't'+ str(time_point)
                concat_this = pd.concat([concat_this, pd.DataFrame({timepoint: per_cpw.value[per_cpw.timepoint == timepoint].values})],axis = 1)
            reformatted = pd.concat([reformatted, concat_this])
    return reformatted
########### end of def reformat(chemical_index, behav_select, reformatted):


reformatted = pd.DataFrame()

full_devel = "full"
#full_devel = "devel"

chemical_id_from_here = np.unique(df_behav['chemical.id'])

if (full_devel == "devel"):
    randomly_chosen = random.sample(set(chemical_id_from_here), 1)
    chemical_id_from_here = []
    for i in range(len(randomly_chosen)):
        chemical_id_from_here.append(randomly_chosen[i])

#chemical_id_from_here = ['1846']
#chemical_id_from_here = ['471']

total_number_of_chemicals_to_processed = len(chemical_id_from_here)
number_of_chemicals_processed = 0

for chemical_index in chemical_id_from_here:
    print("chemical_index:" + str(chemical_index))
    reformatted = reformat(chemical_index, df_select, reformatted)
    
    number_of_chemicals_processed += 1
    print_this = str(number_of_chemicals_processed) + " chemicals processed out of " + str(total_number_of_chemicals_to_processed)
    print(print_this)

end_time = time.time()
time_took = str(round((end_time-start_time), 1)) + " seconds"
print ("Transposing time points is done. It took :"+str(time_took)) 
# took 5.5 hrs in pnnl laptop for 196 chemicals
#'''

In [None]:
''' # basic check of timepoint #
full_devel = "full"
#full_devel = "devel"

if (full_devel == "full"):
    chemical_id_from_here = np.unique(behav_select['chemical.id'])
else: # full_devel = "devel"
    chemical_id_from_here = np.unique([234])
    
for chemical_index in chemical_id_from_here:
    behav_per_chemical = behav_select.loc[behav_select['chemical.id'] == chemical_index,:]
    print("chemical_index:" + str(chemical_index))

    variables = np.unique(behav_per_chemical['variable'])
    print("variables:" + str(variables))
#    variable_splited = variable.split("t")
    
 #   print (min(variable_splited[1]))
    var_len = len(np.unique(behav_per_chemical['variable']))
    if (var_len != 15):
        display("number of variable:" + str(len(np.unique(behav_per_chemical['variable']))))
'''

In [15]:
# old using groupby
'''
start_time = time.time()
       
def reformat(chemical_index, df_select, reformatted):
    df_per_chemical = df_select.loc[df_select['chemical.id'] == chemical_index,:]
    #print (behav_per_chemical)

    # Append chemical_plate_well as a unique identifier
    # takes long time (~1 min)
    df_per_chemical.insert(0, 'chemical_plate_well', df_per_chemical.loc[:,['chemical.id','plate.id', 'well']].apply(lambda x: '_'.join(x.map(str)), axis = 1))
    
    for cpw in np.unique(df_per_chemical.chemical_plate_well):
        #print (str(cpw))
        per_cpw = df_per_chemical.loc[df_per_chemical.chemical_plate_well == cpw,:]
        per_cpw_grouped = per_cpw.groupby(['chemical.id', 'plate.id', 'well'])
        for name, group in per_cpw_grouped:
            concat_this = pd.DataFrame(
                    {
                    'chemical.id': np.unique(per_cpw['chemical.id']),
                    'plate.id': np.unique(per_cpw['plate.id']),
                    'well': np.unique(per_cpw['well']),
                    'chemical_plate_well': np.unique(per_cpw['chemical_plate_well']),
                    'conc': np.unique(per_cpw['conc'])
                    })
            
            # rename endpoint columns if this is for 15 endpoints
            for time_point in np.arange(len(np.unique(group.variable))):
                #print ("np.unique(group.variable):"+str(np.unique(group.variable)))
                if (len(np.unique(group.variable)) == 15):
                    time_point = time_point + 3
                variable = 't'+ str(time_point)
                #print ("\nvariable:"+str(variable))
                concat_this = pd.concat([concat_this, pd.DataFrame({variable: per_cpw.value[per_cpw.variable == variable].values})],axis = 1)
            reformatted = pd.concat([reformatted, concat_this])
    return reformatted
########### end of def reformat(chemical_index, behav_select, reformatted):


reformatted = pd.DataFrame()

full_devel = "full"
#full_devel = "devel"

chemical_id_from_here = np.unique(df_behav['chemical.id'])

if (full_devel == "devel"):
    randomly_chosen = random.sample(set(chemical_id_from_here), 1)
    chemical_id_from_here = []
    for i in range(len(randomly_chosen)):
        chemical_id_from_here.append(randomly_chosen[i])

#chemical_id_from_here = ['1846']
#chemical_id_from_here = ['471']

total_number_of_chemicals_to_processed = len(chemical_id_from_here)
number_of_chemicals_processed = 0

for chemical_index in chemical_id_from_here:
    print("chemical_index:" + str(chemical_index))
    reformatted = reformat(chemical_index, df_select, reformatted)
    
    number_of_chemicals_processed += 1
    print_this = str(number_of_chemicals_processed) + " chemicals processed out of " + str(total_number_of_chemicals_to_processed)
    print(print_this)

end_time = time.time()
time_took = str(round((end_time-start_time), 1)) + " seconds"
print ("Transposing time points is done. It took :"+str(time_took)) 
# took 5.5 hrs in pnnl laptop for 196 chemicals
'''

chemical_index:53
1 chemicals processed out of 196
chemical_index:54
2 chemicals processed out of 196
chemical_index:56
3 chemicals processed out of 196
chemical_index:57
4 chemicals processed out of 196
chemical_index:58
5 chemicals processed out of 196
chemical_index:60
6 chemicals processed out of 196
chemical_index:61
7 chemicals processed out of 196
chemical_index:62
8 chemicals processed out of 196
chemical_index:63
9 chemicals processed out of 196
chemical_index:64
10 chemicals processed out of 196
chemical_index:65
11 chemicals processed out of 196
chemical_index:66
12 chemicals processed out of 196
chemical_index:67
13 chemicals processed out of 196
chemical_index:69
14 chemicals processed out of 196
chemical_index:70
15 chemicals processed out of 196
chemical_index:129
16 chemicals processed out of 196
chemical_index:155
17 chemicals processed out of 196
chemical_index:220
18 chemicals processed out of 196
chemical_index:227
19 chemicals processed out of 196
chemical_index:24

In [None]:
display(len(np.unique(reformatted['chemical.id'])))
#display(len(np.unique(reformatted_w_non_240_endpoints['chemical.id'])))