# RAU_Calc Code
#### Author: Rachel Veenstra
#### Updated: 04-15-2019

In [1]:
# Importing necessary modules

import os
import math
import pandas as pd
import numpy as np
import glob
from scipy import stats
import shutil



# Setting working directory and navigating

directory = glob.os.getcwd()

loc = directory + '/Sample_Datasets'

glob.os.chdir(loc)


#### IMPORTANT - ONLY RUN THIS CELL ONCE TO AVOID OVERWRITING DIRECTORY ####



In [3]:
# Mass-importing all files from specified datasets folder

all_data = glob.glob(loc + "/*.csv")

In [37]:
# Creating lists to be appended with values from future dataframes

n_final_id = []
n_final_conc = []
u_final_id = []
u_final_conc = []



# Looping through all files in dataset folder

for i in all_data:
    
    
    # Defining analysis type based on file name
    
    analysis = i.split('_')[-2]
    
    
    
    # Running only Nitrates files
    
    if analysis == 'NIT':
        
        
        # Reading data with pandas
        
        NDF = pd.read_csv(i)
        
        
        # Copying only ID and absorbance to new dataframe for grouping functions
        
        nit_df = NDF[['Sample_ID', 'Absorbance']]
        
        
        # Each sample has duplicate readings, so data must be grouped by sample ID

        nit_g = nit_df.groupby('Sample_ID')
        
        
        # Creating a dataframe with the mean of each "sample group" and 
        # merging with copied dataframe based on ID ... this creates two
        # columns with absorbance data - one with the original reading, and
        # the other with the overall mean from that sample.

        nit_final=nit_df.merge(nit_g.mean(),on='Sample_ID')
        
        
        # Renaming columns for easy manipulation

        nit_final.rename(columns = {'Absorbance_x':'Each', 'Absorbance_y':'Mean'}, inplace=True)

        
        # Pulling other necessary calculation data from original dataframe
        
        nit_final['Type'] = NDF['Type']
        nit_final['Sample_Wt(g)'] = NDF['Sample_Wt(g)']
        
        
        # Creating lists to append with curve and blank absorbance data

        c=[]
        m=[]
        blanks = []

        
        # Looping through rows in dataframe
        
        for i, row in nit_final.iterrows():
            
            
            # Pulling out only values tied to a curve reading

            if row['Type'] == 'C':
                
                
                # Appending known concentration and mean absorbance lists

                c.append(float(row['Sample_ID']))
                m.append(float(row['Mean']))
                
            
            # Pulling out only values tied to a blank reading
            
            elif row['Type'] == 'B':
                
                
                # Appending list of blank absorbance values

                blanks.append(float(row['Each']))
                
        
        # Taking the mean of blank values for use as base in final calculations
        
        blank_val = np.mean(blanks)
                
        
        
        # Calculating properties of the calibration curve created by curve points
        # Saving each value as a unique variable
        
        slope, intercept, r_value, p_value, std_err = stats.linregress(m, c)


        
        # Looping through rows in dataframe
        
        for i, row in nit_final.iterrows():
            
            
            # Pulling out only values tied to an original sample reading
            # because we are using means for each sample, both the original
            # and duplicate Sample ID do not need to be on the final list
            
            if row['Type'] == 'O':
                
                
                # Appending the nitrates master list with both the calculated concentration
                # and the corresponding sample ID information

                n_final_conc.append(((row['Mean'] - blank_val)*slope + intercept)/((row['Sample_Wt(g)']*1000)/7.5))
                n_final_id.append(row['Sample_ID'])

    
    
    # Running only Ureides files
    
    if analysis == 'URE':
        
        
        # Reading data with pandas
        
        UDF = pd.read_csv(i)
        
        
        # Copying only ID and absorbance to new dataframe for grouping functions
        
        ure_df = UDF[['Sample_ID', 'Absorbance']]

        
        # Each sample has duplicate readings, so data must be grouped by sample ID
        
        ure_g = ure_df.groupby('Sample_ID')
        
        
        # Creating a dataframe with the mean of each "sample group" and 
        # merging with copied dataframe based on ID ... this creates two
        # columns with absorbance data - one with the original reading, and
        # the other with the overall mean from that sample.

        ure_final=ure_df.merge(ure_g.mean(),on='Sample_ID')
        
        
        # Renaming columns for easy manipulation

        ure_final.rename(columns = {'Absorbance_x':'Each', 'Absorbance_y':'Mean'}, inplace=True)

        
        # Pulling other necessary calculation data from original dataframe
        
        ure_final['Type'] = UDF['Type']
        ure_final['Sample_Wt(g)'] = UDF['Sample_Wt(g)']
        
        
        # Creating lists to append with curve and blank absorbance data

        c=[]
        m=[]
        blanks = []
        
        
        # Looping through rows in dataframe

        for i, row in ure_final.iterrows():
            
            
            # Pulling out only values tied to a curve reading

            if row['Type'] == 'C':
                
                
                # Appending known concentration and mean absorbance lists

                c.append(float(row['Sample_ID']))
                m.append(float(row['Mean']))


            # Pulling out only values tied to a blank reading    

            elif row['Type'] == 'B':
                
                
                # Appending list of blank absorbance values

                blanks.append(float(row['Each']))
                
                        
        # Taking the mean of blank values for use as base in final calculations        
                
        blank_val = np.mean(blanks)
                
            
        # Calculating properties of the calibration curve created by curve points
        # Saving each value as a unique variable    
                
        slope, intercept, r_value, p_value, std_err = stats.linregress(m, c)

        
        # Looping through rows in dataframe
        
        for i, row in ure_final.iterrows():
            
            
            # Pulling out only values tied to an original sample reading
            # because we are using means for each sample, both the original
            # and duplicate Sample ID do not need to be on the final list
            
            if row['Type'] == 'O':

                
                # Appending the nitrates master list with both the calculated concentration
                # and the corresponding sample ID information
                
                u_final_conc.append(((row['Mean'] - blank_val)*slope + intercept)/((row['Sample_Wt(g)']*1000)/7.5)*4)
                u_final_id.append(row['Sample_ID'])
    

In [43]:
# Creating dataframes with empty columns to fill with data from lists created above

columns = ['Sample_ID', 'Concentration']

nitrates = pd.DataFrame(columns = columns)  

ureides = pd.DataFrame(columns = columns)



# Inserting data from lists into columns of the two dataframes

nitrates['Sample_ID'] = n_final_id
nitrates['Concentration'] = n_final_conc

ureides['Sample_ID'] = u_final_id
ureides['Concentration'] = u_final_conc



# Merging the two dataframes based on the information in their 'Sample_ID' columns

merged_df = pd.merge(nitrates, ureides, how='left', on='Sample_ID')



# Renaming columns to keep N and U straight

merged_df.rename(columns = {'Concentration_x':'Concentration_N', 'Concentration_y':'Concentration_U'}, inplace=True)

In [44]:
# Creating an empty list to append with final RAU calculations

RAU  =[]



# Looping through rows in merged dataframe

for i, row in merged_df.iterrows():
    
    
    
    # Appending list with values calculated ureide portion of total nitrogen
    
    RAU.append(row['Concentration_U'] / (row['Concentration_N'] + row['Concentration_U']))
    
    
    
# Inserting data from list into new column of merged data frame
    
merged_df['RAU'] = RAU

In [47]:
# Assigning a location for saving the output file and navigating

save_loc = directory + '/RAU_Results'

glob.os.chdir(save_loc)



# Saving the dataframe as a .csv file in the specified saving location, ignoring index to preserve simple look

merged_df.to_csv('Final_RAU_Results.csv', index=False)



##### THE COMPLETED FILE WITH FINAL RAU CALCULATIONS CAN NOW BE FOUND IN THE 'RAU_Results' FOLDER!! #####