In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#Read in the table as a pd dataFrame, delimiting by commas, ignore the first two rows
df = pd.read_csv('rawDataMS.csv', delimiter=',', skiprows=2)
df

#This function keeps track of how many different samples are run
#Count the amout of "Average counts from 3 runs" in "Run" column
def countSamples(df):
    sampleCount = 0
    for i in df['Run']:
        if i == 'Average counts from 3 runs':
            sampleCount += 1
    return sampleCount

print(countSamples(df))


14


In [9]:
df

Unnamed: 0,Run,Time,50Cr,52Cr,53Cr,54Cr,58Ni,60Ni,61Ni,62Ni,64Ni
0,,,2vv Nitric 4/24/2023 5:22:13 PM,,,,,,,,
1,1,17:22:26,592.019,7629.2,133.001,198883.98,21801.11,8242.735,370.008,1167.075,802.035
2,2,17:22:38,567.018,7510.101,134.001,193780.52,21573.568,8114.62,396.009,1169.075,809.036
3,3,17:22:50,609.02,7379.994,128.001,188662.54,21004.237,7846.385,323.006,1119.069,813.036
4,Average counts from 3 runs,,589,7506,132,193776,21460,8068,363,1152,808
...,...,...,...,...,...,...,...,...,...,...,...
106,2,18:00:24,35711889,790956570,95979315,25824112,236770.68,89672.089,3975.869,13118.458,265323.11
107,3,18:00:36,35501347,791389200,96115818,25871697,237490.09,89707.435,4059.906,12998.286,264913.42
108,Average counts from 3 runs,,35590580,790937440,95948065,25814822,235151.89,88734.724,3976.536,12879.454,264834.51
109,standard deviation,,108874.99,461618.35,185363.92,62044.004,3445.682,1654.269,83.038,315.667,532.454


In [27]:
#Create a dictionary of dictionaries, where the keys are the sample names, and the values are the dataframes of the sample
#Loop through the rows, whenever the "50Cr" column contians a a string with the string PM or AM in it, extract out the unique sample name

def createDict(df):
    sampleDict = {}
    for i in range(len(df)):
        if type(df['50Cr'][i]) == str:
            if 'PM' in df['50Cr'][i] or 'AM' in df['50Cr'][i]:
                sampleDict[df['50Cr'][i]] = df.iloc[i:i+6]
    return sampleDict

sampleDict = createDict(df)

sampleDict.get('.1 ppm NiCr    4/24/2023 5:24:50 PM')







Unnamed: 0,Run,Time,50Cr,52Cr,53Cr,54Cr,58Ni,60Ni,61Ni,62Ni,64Ni
8,,,.1 ppm NiCr 4/24/2023 5:24:50 PM,,,,,,,,
9,1,17:25:02,267438.77,6997041.5,676365.45,521919.96,4243503.1,1353106.3,60963.729,198788.93,1399062.6
10,2,17:25:15,281226.61,7374526.3,713934.44,542301.53,4358725.7,1418253.2,63298.605,208571.48,1472068.3
11,3,17:25:26,284769.37,7426923.4,875399.06,544842.94,4369767.2,1429358.7,63852.458,211618.7,1484774.4
12,Average counts from 3 runs,,277812,7266164.0,755233.0,536355.0,4323999.0,1400239.0,62705.0,206326.0,1451969.0
13,standard deviation,,9156,234535.0,105749.0,12565.0,69929.0,41194.0,1533.0,6703.0,46256.0


In [30]:
#remove all rows with "NaN" in "Run" column
df_removed_name = df.dropna(subset=['Run'])
df_removed_name.head(10)


Unnamed: 0,Run,Time,50Cr,52Cr,53Cr,54Cr,58Ni,60Ni,61Ni,62Ni,64Ni
1,1,17:22:26,592.019,7629.2,133.001,198883.98,21801.11,8242.735,370.008,1167.075,802.035
2,2,17:22:38,567.018,7510.101,134.001,193780.52,21573.568,8114.62,396.009,1169.075,809.036
3,3,17:22:50,609.02,7379.994,128.001,188662.54,21004.237,7846.385,323.006,1119.069,813.036
4,Average counts from 3 runs,,589.0,7506.0,132.0,193776.0,21460.0,8068.0,363.0,1152.0,808.0
5,standard deviation,,21.0,125.0,3.0,5111.0,410.0,202.0,37.0,28.0,6.0
6,%RSD,,4.0,2.0,2.0,3.0,2.0,3.0,10.0,2.0,1.0
9,1,17:25:02,267438.77,6997041.5,676365.45,521919.96,4243503.1,1353106.3,60963.729,198788.93,1399062.6
10,2,17:25:15,281226.61,7374526.3,713934.44,542301.53,4358725.7,1418253.2,63298.605,208571.48,1472068.3
11,3,17:25:26,284769.37,7426923.4,875399.06,544842.94,4369767.2,1429358.7,63852.458,211618.7,1484774.4
12,Average counts from 3 runs,,277812.0,7266164.0,755233.0,536355.0,4323999.0,1400239.0,62705.0,206326.0,1451969.0


In [34]:
#Transfrom the dictionary into a big dataframe, with key as a column(Sample Name) and all other information as rows
def transformDict(sampleDict):
    df = pd.DataFrame()
    #Create a new column called "Sample Name"
    df['Sample Name'] = sampleDict.keys()
    #add all colunms after the column "Time" as colunmns in the new dataframe
    
    
    for i in sampleDict.keys():
        for j in sampleDict.get(i).columns[2:]:
            df[j] = ''
    
    #Each sample and its CR should contain 3 informations, Average counts from 3 runs, Standard Deviation, and RSD
    #Loop through the df_removed_name dataframe, and extract out the information from each sample they are in the same order
    #Each sample has 6 rows, so loop through the df_removed_name dataframe by 6, and last 3 are the information we need



    



    return df

transformDict(sampleDict)


Unnamed: 0,Sample Name,50Cr,52Cr,53Cr,54Cr,58Ni,60Ni,61Ni,62Ni,64Ni
0,2vv Nitric 4/24/2023 5:22:13 PM,,,,,,,,,
1,.1 ppm NiCr 4/24/2023 5:24:50 PM,,,,,,,,,
2,1 ppm NiCr 4/24/2023 5:27:27 PM,,,,,,,,,
3,5 ppm NiCr 4/24/2023 5:30:05 PM,,,,,,,,,
4,20 ppm NiCr 4/24/2023 5:32:43 PM,,,,,,,,,
5,10B1[01]01A 4/24/2023 5:35:25 PM,,,,,,,,,
6,10D1[01]04A 4/24/2023 5:38:05 PM,,,,,,,,,
7,10D1[01]05A 4/24/2023 5:40:45 PM,,,,,,,,,
8,Undig CrF3 4/24/2023 5:43:26 PM,,,,,,,,,
9,10D [01] 303 4/24/2023 5:46:07 PM,,,,,,,,,
