In [1]:
import pandas as pd
import numpy as np
import math
       
import os, sys, shutil

def print_all_df(df):
    # Permanently changes the pandas settings
    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', None)
    pd.set_option('display.max_colwidth', -1)
    
    # All dataframes hereafter reflect these changes.
    display(df)
    
    print('**RESET_OPTIONS**')
    
    # Resets the options
    pd.reset_option('all')
    


def determine_experimental_number():
    
    num_of_experiments = 0
    num_of_standards = 0
    #go through the well_designation dict and if a reaction type is not a standards, plus 1 to num of experiments
    for well in well_designation.keys():
        if well_designation[well]['Reaction Type'] != 'Standard':
            num_of_experiments +=1
        else:
            num_of_standards +=1

    return num_of_experiments, num_of_standards


def slice_dataframe_based_on_experimental_number(num_of_experiments, num_of_standards, raw_data):
    """"Slices data frame based on the experimental number."""
    """ Assumes gap of three rows between MG and GFP with GFP underneath """
    """ Includes negative control in experimental number """


    chromatic_list = []
    
    #first chromatic
    chromatic_slicefactor = num_of_experiments + metadataheader + num_of_standards  
    chromatic_1_slice = raw_data.iloc[:chromatic_slicefactor,:]
    #append to list 
    chromatic_list.append(chromatic_1_slice)
    
    #iterate over the rest
    for i in range(1, number_of_chromatics_measured,1):
        
        
        old_factor = chromatic_slicefactor + gap
        
        print(old_factor)
        chromatic_slicefactor = chromatic_slicefactor + gap + num_of_experiments + metadataheader + num_of_standards
        chromatic_slice = raw_data.iloc[old_factor : chromatic_slicefactor,:]
        chromatic_list.append(chromatic_slice)
    
    return chromatic_list 



def import_data(filename):
    data = pd.read_csv(filename, header=None)
   

    date = data.iloc[0,1]
    metadata = data.iloc[:3,:3]

    data = data.iloc[5:,:]
    data.reset_index(inplace=True)
    data = data.iloc[:,1:]
    

    keys = ['date', 'metadata','raw_data']
    values = [date, metadata, data]

    data_dict = {keys: values for keys, values in zip(keys, values)}
    return data_dict

# Introduction

This script takes the raw platereader data, determines how many chromatics have been measured, how many minutes it was recording for and moves the columns down into rows.

Enter the exact string in the last time point below:

In [40]:
experiment_id = "ALTE001"
metadata_file_string = experiment_id + "_experiment_metadata.json"
metadata_file_string

'ALTE001_experiment_metadata.json'

In [41]:
import json

os.chdir('/src/experiment_metadata_files')
print(os.getcwd())

# Opening JSON file 
f = open(metadata_file_string) 

# returns JSON object as  
# a dictionary 
data = json.load(f) 

print(data)

/src/experiment_metadata_files
{'move_file': 'True', 'using_standard_curve': 'False', 'save_figures': 'True', 'triplicate': 2, 'number_of_chromatics_measured': 3, 'metadataheader': 2, 'gap': 3, 'chromatics_designations': {'635_680_1': '3000', '485_520_2': '800', '485_520_3': '1500'}, 'metadata': {'Reaction Temperature (°C)': 30, 'Performed by': 'Alex Perkins', 'Instrument': 'BMG POLARstar Omega', 'Experiment #': 8}, 'well_designation': {'B12': {'Reaction Type': 'TXTL', 'System': 'Michael_Lysate_001', 'Energy Solution': 'Michael_ES_001', 'Amplicon DNA Template': 's70_deGFP_MGA', 'Replicate': 1}, 'B13': {'Reaction Type': 'TXTL', 'System': 'Michael_Lysate_001', 'Energy Solution': 'Michael_ES_001', 'Amplicon DNA Template': 's70_deGFP_MGA', 'Replicate': 2}, 'B14': {'Reaction Type': 'TXTL', 'System': 'Michael_Lysate_001', 'Energy Solution': 'Michael_ES_001', 'Amplicon DNA Template': 's70_deGFP_MGA', 'Replicate': 3}, 'B15': {'Reaction Type': 'TXTL', 'System': 'Michael_Lysate_001', 'Energy Sol

In [43]:
end_time_string = "11 h 40 min"

In [46]:


# Import data from CSV
# Import a CSV file for each experiment run

####################################################################
# gets all items in directory

os.getcwd()
os.chdir('/src/data/raw_data_files/')

items = os.listdir(".")

# lists all .csv
csv_list = []
for names in items:
    if names.endswith(".CSV") or names.endswith(".csv"):
        csv_list.append(names)
print(csv_list)
try:
    if(len(csv_list) > 1):
        raise UnAcceptedValueError("More than 1x .CSV file in the directory");
except UnAcceptedValueError as e:
    print ("Received error:", e.data)
    # kills the process
    quit()
##########################################################################################

for f in csv_list:
    
    # get the file name without extention
    f_name, f_ext = os.path.splitext(f)
    
    #if the last three characters are 'raw'
    if f_name[-3:] == 'raw':


        df = pd.read_csv(f, header=None)
        

        df_head = df.iloc[:4,:]
        df_head = df_head.set_axis(np.arange(0,df_head.shape[1],1), axis=1)

        df_samples = df.iloc[5:,:2]

        timelist = df.iloc[6,:]

        timelist = timelist.reset_index(drop=True)


        number_of_chromatics = timelist.value_counts()[0]


        print(timelist[timelist == '0 h '].index)
        print('length of index thing')
        print(len(timelist[timelist == '0 h '].index))
        print('')

        chromatics_list = [df_head]


        for i, v in enumerate(timelist[timelist == '0 h '].index):
            start = v


            if (i+1) >= len(timelist[timelist == '0 h '].index):

                chromatic = df.iloc[4:, start:]
                chromatic.insert(loc=0, column="Content", value=df.iloc[4:,1])
                chromatic.insert(loc=0, column="Well", value=df.iloc[4:,0])
                chromatic = chromatic.set_axis(np.arange(0,chromatic.shape[1],1), axis=1)
                chromatics_list.append(chromatic)

            else:
                end = timelist[timelist == '0 h '].index[i+1]

                chromatic = df.iloc[4:, start:end]
                chromatic.insert(loc=0, column="Content", value=df.iloc[4:,1])
                chromatic.insert(loc=0, column="Well", value=df.iloc[4:,0])
                #add spacer rows
                chromatic = chromatic.append(pd.Series(), ignore_index=True)
                chromatic = chromatic.append(pd.Series(), ignore_index=True)


                chromatic = chromatic.set_axis(np.arange(0,chromatic.shape[1],1), axis=1)
                chromatics_list.append(chromatic)

        final = pd.concat(chromatics_list)

        final.reset_index(inplace=True, drop=True)

        mask = final.iloc[6,:] == end_time_string
        mask = mask.values

        twelve_hr_index = final.iloc[6,mask].index

        twelve_hr_index = twelve_hr_index[0]

        twelve_hr_index = twelve_hr_index + 1

        final = final.iloc[:,:twelve_hr_index]
        
        # making the new name by replacing raw with parsed_dataset
        new_name = f_name[:-3]
        new_name = new_name+'parsed_dataset.csv'
        print(new_name)
        
        #######################################################################

        print(os.getcwd())
        path = "/src/data/parsed_data_files/"
        # make directory for sticking the output in
        if os.path.isdir(path) == False:
            os.mkdir(path, mode=0o777)
        
        os.chdir(path)
        final.to_csv(new_name, header=False, index=False)
        
        
        #navigate home for neatness
        os.chdir('/src')


['ALTE001_raw.csv']
Int64Index([2, 353, 704], dtype='int64')
length of index thing
3

ALTE001_parsed_dataset.csv
/src/data/raw_data_files


  chromatic = chromatic.append(pd.Series(), ignore_index=True)
  chromatic = chromatic.append(pd.Series(), ignore_index=True)
  chromatic = chromatic.append(pd.Series(), ignore_index=True)
  chromatic = chromatic.append(pd.Series(), ignore_index=True)
  chromatic = chromatic.append(pd.Series(), ignore_index=True)
  chromatic = chromatic.append(pd.Series(), ignore_index=True)
  chromatic = chromatic.append(pd.Series(), ignore_index=True)
  chromatic = chromatic.append(pd.Series(), ignore_index=True)


# Tidy Data Script

In [48]:
# navigate into processed_data_files and grab the parsed dataset

os.chdir('/src/data/parsed_data_files')
os.getcwd()

items = os.listdir(".")

# lists all .csv
csv_list = []
for names in items:
    if names.endswith(".csv"):
        csv_list.append(names)
print(csv_list)

##########################################################################################

chromatic_name_list_for_saving = []

## get the dataset name
dataset_name, f_ext = os.path.splitext(csv_list[0])
dataset_name = dataset_name[:-14]


for f in csv_list:
    
    # get the file name without extention
    f_name, f_ext = os.path.splitext(f)
    
    #if the last three characters are 'raw'
    if f_name[-14:] == 'parsed_dataset':

        raw_package_dict = import_data(f)
        
        #get raw data from dictionary
        raw_data = raw_package_dict['raw_data']

        # extracting experimental details using the dict
        num_of_experiments, num_of_standards = determine_experimental_number()
        print(num_of_experiments)
            
        
        chromatic_list = slice_dataframe_based_on_experimental_number(num_of_experiments,num_of_standards, raw_data)
        
        

['ALTE001_parsed_dataset.csv']


NameError: name 'well_designation' is not defined

In [16]:

#get time list
time = list(raw_data.iloc[raw_data[raw_data.iloc[:,1]=='Time'].index[0],:])
#delete inital nan

if math.isnan(float(time[0])) == True:
    del time[0]
    del time[0]
    print('Time list is ready, deleted nan and time')
elif time[0] == 'Time':
    del time[0]
    print('Time list is ready, deleted time')
elif time[0] == '0 h ':
    print('Time list is ready')
else:
    print('Time list is weird')


#remove nans
time = [x for x in time if str(x) != 'nan']
#print(time)   

Time list is ready, deleted nan and time


In [17]:
minute_list= []

for i in time:
    #sets the first character of the string as a numeric
    a = int(str(i)[0])

    if len(i) == 4:
        # if the time is a single hour, e.g. '2 h', multiply the first character by 60 and return
        hour = int(str(i)[0]) * 60
        # addition
        total_time = hour
        # add to list
        minute_list.append(total_time)

    if len(i) == 5:
        # if the time is a single hour, e.g. '2 h', multiply the first character by 60 and return
        hour = int(str(i)[0:2]) * 60
        # addition
        total_time = hour
        # add to list
        minute_list.append(total_time)
    
    elif len(i) == 9:
        # multiply the first character by 60
        hour = int(str(i)[0]) * 60
        # save the fifth character
        minute = int(str(i)[4])
        # addition
        total_time = hour + minute
        # add to list
        minute_list.append(total_time)
    
    elif len(i) == 10:
        #if the first number is a less than 10 (1 not 11)
        if i[1].isspace():
            # multiply the first character by 60
            hour = int(str(i)[0]) * 60
            # save the fifth and sixth characters
            minute = int(str(i)[4:6])
            # addition
            total_time = hour + minute
            # add to list
            minute_list.append(total_time)
        else:
            # multiply the first character by 60
            hour = int(str(i)[0:2]) * 60
            # save the fifth and sixth characters
            minute = int(str(i)[5])
            # addition
            total_time = hour + minute
            # add to list
            minute_list.append(total_time)

    elif len(i) == 11:
        # multiply the first character by 60
        hour = int(str(i)[0:2]) * 60
        # save the fifth and sixth characters
        minute = int(str(i)[5:7])
        # addition
        total_time = hour + minute
        # add to list
        minute_list.append(total_time)


minute_list.insert(0,'Time (Mins)')
minute_list.insert(0,'Sample')

In [18]:

#store the chromatic names for saving the data
for i,v in enumerate(chromatic_list):

    
    #get the whole string contatining the chromatic name
    chromatic_name = chromatic_list[i].iloc[0,2]
    
    print(chromatic_name)
    
    # get index of the '(' character in the string'
    chromatic_name_index = chromatic_list[i].iloc[0,2].index('(')
    
    # trim the string to include all the characters after the ( 
    chromatic_name = chromatic_name[chromatic_name_index+1:]
    
    # get rid of the last character )
    chromatic_name = chromatic_name[:-1]
    
    chromatic_name = chromatic_name.replace(" ", "_")
    chromatic_name = chromatic_name.replace("/", "_")
    
    chromatic_name_list_for_saving.append(chromatic_name)

Raw Data (485/520 3)
Raw Data (635/680 1)
Raw Data (485/520 2)


In [19]:
def add_time(df, time_list):

    #check index correct
    df.reset_index(inplace=True)
    df = df.iloc[:,1:]
    
    #delete spectral and time row if necessary
    while df.iloc[0,1] != "Time":
        df = df.iloc[1:,:]
        df.reset_index(inplace=True)
        df = df.iloc[:,1:]

    #insert the minute lists
    df.iloc[0,:] = minute_list

    return df

for i,v in enumerate(chromatic_list):
    chromatic_list[i] = add_time(chromatic_list[i], minute_list)
    
    

In [20]:
def reorder(df):
    
    #get the row that is to be the columns
    column_row = df.iloc[df.loc[df.iloc[:,1]=='Time (Mins)',:].index[0]]
    
    #rename columns
    df.columns = column_row
    
    
    df = df.iloc[1:,:]
    
    # save sample and time for the melt
    keep_these = column_row[:2]
    #drop Sample and Time
    cols_time = column_row[2:]

    df = pd.melt(df,
            
            id_vars=keep_these,
            
            value_vars=cols_time,
            
            var_name='Time (Minutes)',
            value_name='RFUs'
           )
    
    # rename Sample to well and Time (Mins to Sample Id)
    df.rename(columns = {"Sample": "Well", "Time (Mins)":"Sample ID"}, inplace = True)
    
    ##replace in categories list
    #categories[categories.index('Time (Mins)')] = 'Time (Minutes)'
    #categories[categories.index('Sample')] = 'Sample ID'
    #categories = categories + ['Well']

    
    return df


for i,v in enumerate(chromatic_list):
    
        chromatic_list[i] = reorder(chromatic_list[i])
        
        #Add chromatics and gains

        chromatic_list[i]['Gain Setting'] = chromatics_designations[chromatic_name_list_for_saving[i]]
        
        chromatic_list[i]['Chromatic Settings'] = chromatic_name_list_for_saving[i]
        
        chromatic_list[i]['Chromatic Settings'] = chromatic_list[i]['Chromatic Settings'].str.replace('_'+str(i+1), '')

In [21]:
print(chromatic_list[2].columns)
for i, row in chromatic_list[2].iterrows():
    for col in chromatic_list[2].columns:
        print(row[col])

Index(['Well', 'Sample ID', 'Time (Minutes)', 'RFUs', 'Gain Setting',
       'Chromatic Settings'],
      dtype='object')
C03
Sample X1
0
81
800
485_520_2
C06
Sample X2
0
77
800
485_520_2
F03
Sample X3
0
77
800
485_520_2
F06
Sample X4
0
78
800
485_520_2
I03
Sample X5
0
78
800
485_520_2
I06
Sample X6
0
77
800
485_520_2
L03
Sample X7
0
73
800
485_520_2
L06
Sample X8
0
75
800
485_520_2
O03
Sample X9
0
73
800
485_520_2
O06
Sample X10
0
168
800
485_520_2
C09
Sample X11
0
73
800
485_520_2
C12
Sample X12
0
74
800
485_520_2
F09
Sample X13
0
75
800
485_520_2
F12
Sample X14
0
75
800
485_520_2
I09
Sample X15
0
80
800
485_520_2
I12
Sample X16
0
74
800
485_520_2
L09
Sample X17
0
76
800
485_520_2
L12
Sample X18
0
75
800
485_520_2
O09
Sample X19
0
76
800
485_520_2
O12
Sample X20
0
73
800
485_520_2
C15
Sample X21
0
75
800
485_520_2
C18
Sample X22
0
76
800
485_520_2
F15
Sample X23
0
74
800
485_520_2
F18
Sample X24
0
72
800
485_520_2
I15
Sample X25
0
75
800
485_520_2
I18
Sample X26
0
71
800
485_520_2
L1

Sample X1
158
235
800
485_520_2
C06
Sample X2
158
221
800
485_520_2
F03
Sample X3
158
187
800
485_520_2
F06
Sample X4
158
189
800
485_520_2
I03
Sample X5
158
153
800
485_520_2
I06
Sample X6
158
156
800
485_520_2
L03
Sample X7
158
135
800
485_520_2
L06
Sample X8
158
144
800
485_520_2
O03
Sample X9
158
124
800
485_520_2
O06
Sample X10
158
224
800
485_520_2
C09
Sample X11
158
192
800
485_520_2
C12
Sample X12
158
196
800
485_520_2
F09
Sample X13
158
169
800
485_520_2
F12
Sample X14
158
188
800
485_520_2
I09
Sample X15
158
160
800
485_520_2
I12
Sample X16
158
163
800
485_520_2
L09
Sample X17
158
146
800
485_520_2
L12
Sample X18
158
152
800
485_520_2
O09
Sample X19
158
72
800
485_520_2
O12
Sample X20
158
70
800
485_520_2
C15
Sample X21
158
207
800
485_520_2
C18
Sample X22
158
227
800
485_520_2
F15
Sample X23
158
148
800
485_520_2
F18
Sample X24
158
172
800
485_520_2
I15
Sample X25
158
139
800
485_520_2
I18
Sample X26
158
145
800
485_520_2
L15
Sample X27
158
119
800
485_520_2
L18
Sample X28
1

C03
Sample X1
316
269
800
485_520_2
C06
Sample X2
316
254
800
485_520_2
F03
Sample X3
316
218
800
485_520_2
F06
Sample X4
316
222
800
485_520_2
I03
Sample X5
316
183
800
485_520_2
I06
Sample X6
316
184
800
485_520_2
L03
Sample X7
316
160
800
485_520_2
L06
Sample X8
316
164
800
485_520_2
O03
Sample X9
316
148
800
485_520_2
O06
Sample X10
316
196
800
485_520_2
C09
Sample X11
316
223
800
485_520_2
C12
Sample X12
316
229
800
485_520_2
F09
Sample X13
316
193
800
485_520_2
F12
Sample X14
316
223
800
485_520_2
I09
Sample X15
316
188
800
485_520_2
I12
Sample X16
316
195
800
485_520_2
L09
Sample X17
316
179
800
485_520_2
L12
Sample X18
316
182
800
485_520_2
O09
Sample X19
316
74
800
485_520_2
O12
Sample X20
316
70
800
485_520_2
C15
Sample X21
316
241
800
485_520_2
C18
Sample X22
316
268
800
485_520_2
F15
Sample X23
316
171
800
485_520_2
F18
Sample X24
316
204
800
485_520_2
I15
Sample X25
316
166
800
485_520_2
I18
Sample X26
316
177
800
485_520_2
L15
Sample X27
316
138
800
485_520_2
L18
Sample X

466
148
800
485_520_2
L18
Sample X28
466
154
800
485_520_2
O15
Sample X29
466
62
800
485_520_2
O18
Sample X30
466
64
800
485_520_2
C03
Sample X1
468
276
800
485_520_2
C06
Sample X2
468
261
800
485_520_2
F03
Sample X3
468
227
800
485_520_2
F06
Sample X4
468
232
800
485_520_2
I03
Sample X5
468
192
800
485_520_2
I06
Sample X6
468
193
800
485_520_2
L03
Sample X7
468
166
800
485_520_2
L06
Sample X8
468
173
800
485_520_2
O03
Sample X9
468
158
800
485_520_2
O06
Sample X10
468
204
800
485_520_2
C09
Sample X11
468
231
800
485_520_2
C12
Sample X12
468
238
800
485_520_2
F09
Sample X13
468
198
800
485_520_2
F12
Sample X14
468
227
800
485_520_2
I09
Sample X15
468
197
800
485_520_2
I12
Sample X16
468
206
800
485_520_2
L09
Sample X17
468
186
800
485_520_2
L12
Sample X18
468
194
800
485_520_2
O09
Sample X19
468
74
800
485_520_2
O12
Sample X20
468
72
800
485_520_2
C15
Sample X21
468
245
800
485_520_2
C18
Sample X22
468
278
800
485_520_2
F15
Sample X23
468
181
800
485_520_2
F18
Sample X24
468
210
800
48

O03
Sample X9
642
162
800
485_520_2
O06
Sample X10
642
209
800
485_520_2
C09
Sample X11
642
232
800
485_520_2
C12
Sample X12
642
245
800
485_520_2
F09
Sample X13
642
204
800
485_520_2
F12
Sample X14
642
230
800
485_520_2
I09
Sample X15
642
200
800
485_520_2
I12
Sample X16
642
208
800
485_520_2
L09
Sample X17
642
190
800
485_520_2
L12
Sample X18
642
193
800
485_520_2
O09
Sample X19
642
74
800
485_520_2
O12
Sample X20
642
76
800
485_520_2
C15
Sample X21
642
250
800
485_520_2
C18
Sample X22
642
286
800
485_520_2
F15
Sample X23
642
184
800
485_520_2
F18
Sample X24
642
216
800
485_520_2
I15
Sample X25
642
176
800
485_520_2
I18
Sample X26
642
188
800
485_520_2
L15
Sample X27
642
151
800
485_520_2
L18
Sample X28
642
162
800
485_520_2
O15
Sample X29
642
64
800
485_520_2
O18
Sample X30
642
60
800
485_520_2
C03
Sample X1
644
282
800
485_520_2
C06
Sample X2
644
267
800
485_520_2
F03
Sample X3
644
232
800
485_520_2
F06
Sample X4
644
238
800
485_520_2
I03
Sample X5
644
192
800
485_520_2
I06
Sample 

In [22]:

tidy_data = pd.concat(chromatic_list, axis=0, sort=False)
tidy_data = tidy_data.loc[:,~tidy_data.columns.duplicated()]


tidy_data

Unnamed: 0,Well,Sample ID,Time (Minutes),RFUs,Gain Setting,Chromatic Settings
0,C03,Sample X1,0,3181,1500,485_520_3
1,C06,Sample X2,0,3042,1500,485_520_3
2,F03,Sample X3,0,3076,1500,485_520_3
3,F06,Sample X4,0,3066,1500,485_520_3
4,I03,Sample X5,0,3137,1500,485_520_3
...,...,...,...,...,...,...
10525,I18,Sample X26,700,192,800,485_520_2
10526,L15,Sample X27,700,152,800,485_520_2
10527,L18,Sample X28,700,159,800,485_520_2
10528,O15,Sample X29,700,64,800,485_520_2


# Well-Specific Metadata

In [23]:
def get_metadata_categories_from_well_designation(dictionary=well_designation):
    
    meta_data_arr = np.array([])

    for well in dictionary.keys():

        meta_data_arr = np.append(meta_data_arr, np.array(list(dictionary[well].keys())))

    meta_data_arr = np.unique(meta_data_arr)

    
    return meta_data_arr

meta_data_arr = get_metadata_categories_from_well_designation()


In [24]:
def assign_metadata_by_well(df, metadatacolumn):
    
    """takes the metadata given, 
    creates a column then iterates over the wells,
    looks up that well in the well_designation dictionary,
    if that well has the metadata given then it adds it to that well's entry in the new column,
    if not then it leaves it blank.
    returns the dataframe"""
    
    #creates blank column
    df[metadatacolumn] =''

    #iterates over the wells
    for well in df['Well'].unique():
        
        if metadatacolumn in well_designation[well]:
            
            df.loc[df['Well'] == well, metadatacolumn] = well_designation[well][metadatacolumn]
            
        else:
            
            df.loc[df['Well'] == well, metadatacolumn] = ''
    
    return df

for meta in meta_data_arr:
    tidy_data = assign_metadata_by_well(tidy_data, meta)
tidy_data

Unnamed: 0,Well,Sample ID,Time (Minutes),RFUs,Gain Setting,Chromatic Settings,Amplicon DNA Template,Energy Solution,K_Glutamate_mM,Mg_Acetate_mM,Reaction Type,Replicate,System
0,C03,Sample X1,0,3181,1500,485_520_3,T7_GFP_MGA,Grassman ES,20,8,TXTL,1,OnePotPure B2
1,C06,Sample X2,0,3042,1500,485_520_3,T7_GFP_MGA,Grassman ES,20,8,TXTL,2,OnePotPure B2
2,F03,Sample X3,0,3076,1500,485_520_3,T7_GFP_MGA,Grassman ES,20,9,TXTL,1,OnePotPure B2
3,F06,Sample X4,0,3066,1500,485_520_3,T7_GFP_MGA,Grassman ES,20,9,TXTL,2,OnePotPure B2
4,I03,Sample X5,0,3137,1500,485_520_3,T7_GFP_MGA,Grassman ES,20,10,TXTL,1,OnePotPure B2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10525,I18,Sample X26,700,192,800,485_520_2,T7_GFP_MGA,Grassman ES,40,10,TXTL,2,OnePotPure B2
10526,L15,Sample X27,700,152,800,485_520_2,T7_GFP_MGA,Grassman ES,40,11,TXTL,1,OnePotPure B2
10527,L18,Sample X28,700,159,800,485_520_2,T7_GFP_MGA,Grassman ES,40,11,TXTL,2,OnePotPure B2
10528,O15,Sample X29,700,64,800,485_520_2,,,,,Blank,1,


# Experiment-Wide MetaData

In [25]:
for meta in metadata.keys():
    tidy_data[meta] = metadata[meta]
tidy_data

Unnamed: 0,Well,Sample ID,Time (Minutes),RFUs,Gain Setting,Chromatic Settings,Amplicon DNA Template,Energy Solution,K_Glutamate_mM,Mg_Acetate_mM,Reaction Type,Replicate,System,Reaction Temperature (°C),Performed by,Instrument,Experiment #
0,C03,Sample X1,0,3181,1500,485_520_3,T7_GFP_MGA,Grassman ES,20,8,TXTL,1,OnePotPure B2,30,Alex Perkins,BMG POLARstar Omega,8
1,C06,Sample X2,0,3042,1500,485_520_3,T7_GFP_MGA,Grassman ES,20,8,TXTL,2,OnePotPure B2,30,Alex Perkins,BMG POLARstar Omega,8
2,F03,Sample X3,0,3076,1500,485_520_3,T7_GFP_MGA,Grassman ES,20,9,TXTL,1,OnePotPure B2,30,Alex Perkins,BMG POLARstar Omega,8
3,F06,Sample X4,0,3066,1500,485_520_3,T7_GFP_MGA,Grassman ES,20,9,TXTL,2,OnePotPure B2,30,Alex Perkins,BMG POLARstar Omega,8
4,I03,Sample X5,0,3137,1500,485_520_3,T7_GFP_MGA,Grassman ES,20,10,TXTL,1,OnePotPure B2,30,Alex Perkins,BMG POLARstar Omega,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10525,I18,Sample X26,700,192,800,485_520_2,T7_GFP_MGA,Grassman ES,40,10,TXTL,2,OnePotPure B2,30,Alex Perkins,BMG POLARstar Omega,8
10526,L15,Sample X27,700,152,800,485_520_2,T7_GFP_MGA,Grassman ES,40,11,TXTL,1,OnePotPure B2,30,Alex Perkins,BMG POLARstar Omega,8
10527,L18,Sample X28,700,159,800,485_520_2,T7_GFP_MGA,Grassman ES,40,11,TXTL,2,OnePotPure B2,30,Alex Perkins,BMG POLARstar Omega,8
10528,O15,Sample X29,700,64,800,485_520_2,,,,,Blank,1,,30,Alex Perkins,BMG POLARstar Omega,8


In [13]:
#print(tidy_data)
tidy_data['Assay Date'] = raw_package_dict['metadata'].iloc[1,1][6:]
tidy_data['Assay Date'] = pd.to_datetime(tidy_data['Assay Date'])
tidy_data

  cache_array = _maybe_cache(arg, format, cache, convert_listlike)


Unnamed: 0,Well,Sample ID,Time (Minutes),RFUs,Gain Setting,Chromatic Settings,Amplicon DNA Template,Energy Solution,K_Glutamate_mM,Mg_Acetate_mM,Reaction Type,Replicate,System,Reaction Temperature (°C),Performed by,Instrument,Experiment #,Assay Date
0,C03,Sample X1,0,3181,1500,485_520_3,T7_GFP_MGA,Grassman ES,20,8,TXTL,1,OnePotPure B2,30,Alex Perkins,BMG POLARstar Omega,8,2022-05-21
1,C06,Sample X2,0,3042,1500,485_520_3,T7_GFP_MGA,Grassman ES,20,8,TXTL,2,OnePotPure B2,30,Alex Perkins,BMG POLARstar Omega,8,2022-05-21
2,F03,Sample X3,0,3076,1500,485_520_3,T7_GFP_MGA,Grassman ES,20,9,TXTL,1,OnePotPure B2,30,Alex Perkins,BMG POLARstar Omega,8,2022-05-21
3,F06,Sample X4,0,3066,1500,485_520_3,T7_GFP_MGA,Grassman ES,20,9,TXTL,2,OnePotPure B2,30,Alex Perkins,BMG POLARstar Omega,8,2022-05-21
4,I03,Sample X5,0,3137,1500,485_520_3,T7_GFP_MGA,Grassman ES,20,10,TXTL,1,OnePotPure B2,30,Alex Perkins,BMG POLARstar Omega,8,2022-05-21
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10525,I18,Sample X26,700,192,800,485_520_2,T7_GFP_MGA,Grassman ES,40,10,TXTL,2,OnePotPure B2,30,Alex Perkins,BMG POLARstar Omega,8,2022-05-21
10526,L15,Sample X27,700,152,800,485_520_2,T7_GFP_MGA,Grassman ES,40,11,TXTL,1,OnePotPure B2,30,Alex Perkins,BMG POLARstar Omega,8,2022-05-21
10527,L18,Sample X28,700,159,800,485_520_2,T7_GFP_MGA,Grassman ES,40,11,TXTL,2,OnePotPure B2,30,Alex Perkins,BMG POLARstar Omega,8,2022-05-21
10528,O15,Sample X29,700,64,800,485_520_2,,,,,Blank,1,,30,Alex Perkins,BMG POLARstar Omega,8,2022-05-21


# Save to CSV

In [27]:

print(os.getcwd())
path = "/src/tidy_data_files/"

# make directory for sticking the output in
if os.path.isdir(path) == False:
    os.mkdir(path, mode=0o777)
    
    
#navigate to tidy_data_files
os.chdir(path)

tidy_data.to_csv(dataset_name+"tidy_data.csv", header=True, index=False)


#navigate home for neatness
os.chdir('/src')

/src
