# Creating Datasheets

#### Purpose: 
> ##### Importing files, extracting data, and creating a set of datasheets for lab use.

#### Inputs: 
> ##### .csv file (NAMED IN FORMAT SPECIFIED) with Sample ID information in "Student_Lists" folder
- Naming format: (Initials)_(StudyAbbreviation).csv
- Example: RV_PYN.csv

> ##### User input (when prompted) of how many samples will be run per set

#### Output:
> ##### Set of datasheets in Lab_Sheets folder that are:
- Comprised of four columns

>>1) Data type : Contains class information given as B (blank), C (curve), O (original sample), or D (duplicated sample)

>>2) Sample_Wt(g) : Contains blank cells for lab to fill (unit = grams)

>>3) Sample_ID : Contains identification information for blanks, curve values, and *DUPLICATED* Sample ID values from .csv file provided

>>4) Absorbance : Contains blank cells for lab to fill

>- Separated based on identified set size
>- Duplicated for ureide and nitrate analysis
>> - Standard curve (C) values unique to analysis type
- Uniquely named based on name of original .csv file, set number, analysis type

#### Author: Rachel Veenstra
#### Date Created: 03-29-2019


In [5]:
# Importing necessary modules

import pandas as pd
import numpy as np
import glob
from itertools import cycle



# Identifying path for mass-data retrieval

path = str(glob.os.getcwd())

user = path.split('\\')[2]

folder = "Student_Lists/"

data_files = '/Users/' + user + '/Desktop/Coding/StandardLab/' + folder

glob.os.chdir(data_files)

all_data_files = glob.glob(data_files + "/*.csv")



# Looping through each csv file in given location

for file in all_data_files:
    id_data = pd.read_csv(file, header=None)
    
    
    # Doubling each row to account for original and duplicate samples
    
    template = id_data.loc[np.repeat(id_data.index.values, 2)]
    

    # Prompting user to identify desired set size
    
    samp_set = int(input('You have ' + str(len(id_data)) + ' unique samples (not including duplicates) to run in ' + file.split('\\')[-1] + '. How many of these do you wish to run per set?'))
    print("Creating " + str(int(len(id_data)/samp_set)) + " sheets for " + str(len(template)) + " total samples.")
   
    
    # Setting counter for "while loop" to limit the number of times code is run
    
    x = 0

    
    # Creating sheets based on number of samples to be displayed on each sheet (set size)
    
    while x <= int(len(id_data)/samp_set):
    
        for i in range(int(len(id_data)/samp_set)):
        
            template2 = template[(i*samp_set*2):((1+i)*samp_set*2)]
        
            
            # Creating copies of original set for both nitrates and ureides 
            
            nit_id_data = template2
            ure_id_data = template2
        
            
            # Inserting rows for blanks and curve data in nitrates through a list
        
            ntop = []

            ntop.insert(0, 4500.00)
            ntop.insert(0, 4500.00)
            ntop.insert(0, 4500.00)
            ntop.insert(0, 3000.00)
            ntop.insert(0, 3000.00)
            ntop.insert(0, 3000.00)
            ntop.insert(0, 1500.00)
            ntop.insert(0, 1500.00)
            ntop.insert(0, 1500.00)
            ntop.insert(0, 0.00)
            ntop.insert(0, 0.00)
            ntop.insert(0, 0.00)
            ntop.insert(0, '2_2')
            ntop.insert(0, '2_1')
            ntop.insert(0, '1_2')
            ntop.insert(0, '1_1')

            nit_id_data = pd.concat([pd.DataFrame(ntop), nit_id_data], ignore_index = True)

           
            # Creating values for sample type
            
            seq = cycle(['O', 'D'])

            type_list = ['B', 'B', 'B', 'B', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C']

           
            # Creating new columns in dataframe and appending corresponding data 
            # ... or leaving rows empty for lab to fill in later
            
            nit_id_data['Type'] = [next(seq) for i in range(len(nit_id_data))]
            nit_id_data.Type[0:16] = type_list
            nit_id_data['Sample_Wt(g)'] = ''
            nit_id_data['Absorbance'] = ''
            nit_id_data = nit_id_data[['Type', 'Sample_Wt(g)', 0, 'Absorbance']]
            nit_id_data.columns = ['Type', 'Sample_Wt(g)', 'Sample_ID', 'Absorbance']

            
            # Inserting rows for blanks and curve data in ureiedes through a list
            
            utop = []

            utop.insert(0, 4396.660764)
            utop.insert(0, 4396.660764)
            utop.insert(0, 4396.660764)
            utop.insert(0, 1099.165191)
            utop.insert(0, 1099.165191)
            utop.insert(0, 1099.165191)
            utop.insert(0, 549.5825955)
            utop.insert(0, 549.5825955)
            utop.insert(0, 549.5825955)
            utop.insert(0, 0.00)
            utop.insert(0, 0.00)
            utop.insert(0, 0.00)
            utop.insert(0, '2_2')
            utop.insert(0, '2_1')
            utop.insert(0, '1_2')
            utop.insert(0, '1_1')

            ure_id_data = pd.concat([pd.DataFrame(utop), ure_id_data], ignore_index = True)

            
            # Creating new columns in dataframe and appending corresponding data 
            # ... or leaving rows empty for lab to fill in later (used type variables
            # previously created for nitrates)
            
            ure_id_data['Type'] = [next(seq) for i in range(len(ure_id_data))]
            ure_id_data.Type[0:16] = type_list
            ure_id_data['Sample_Wt(g)'] = ''
            ure_id_data['Absorbance'] = ''
            ure_id_data = ure_id_data[['Type', 'Sample_Wt(g)', 0, 'Absorbance']]
            ure_id_data.columns = ['Type', 'Sample_Wt(g)', 'Sample_ID', 'Absorbance']
            
            
            # Reassigning navigation variables to save ouput files in a new location
            
            folder = "Lab_Sheets"

            save_loc = '/Users/' + user + '/Desktop/Coding/StandardLab/' + folder

            
            # Navigating to specified saving location
            
            glob.os.chdir(save_loc)

            
            # Separating file information to extract title for final output names
            
            file_name = file.split('\\')[-1]
            title = file_name.split('.')[0]
            
            
            # Saving unique nitrate and ureide datasets in new location with unique names
            # based on set# ... date is blank for user to change later when sampes are run
            
            nit_id_data.to_csv('A_' + str(i+1) + "_NIT_" + str(title) + "_00_00_00.csv", index=False)
            ure_id_data.to_csv('A_' + str(i+1) + "_URE_" + str(title) + "_00_00_00.csv", index=False)

            
            # Appending counter to reflect number of sheets successfully created before returning
            # to continue with "while loop"
            
            x = x+2

You have 200 unique samples (not including duplicates) to run in MR_USB.csv. How many of these do you wish to run per set? 20


Creating 10 sheets for 400 total samples.


You have 300 unique samples (not including duplicates) to run in RV_CTS.csv. How many of these do you wish to run per set? 20


Creating 15 sheets for 600 total samples.
