In [12]:
import numpy as np
import pandas as pd
import plotly.express as px
from tools import import_data

# Import the data from the csv files
df_calibration = import_data("calibration.csv")
df_sample = import_data("sample.csv")

# Calculate the known concentration of the calibration samples in mg/L
df_calibration["Concentration mg/L"] = 50 / df_calibration["Dilution"]

df_calibration.head(), df_calibration.tail()

(     Sample Well  Wavelength  Dilution  Raw Absorbance  Concentration mg/L
 6984  Blank   E1       220.0       1.0           3.977                50.0
 7275  Blank   E3       220.0       1.0           3.393                50.0
 7566  Blank   E5       220.0       1.0           3.439                50.0
 6985  Blank   E1       222.0       1.0           3.618                50.0
 7276  Blank   E3       222.0       1.0           3.726                50.0,
      Sample Well  Wavelength  Dilution  Raw Absorbance  Concentration mg/L
 4654     S1   C7       798.0     128.0           0.051            0.390625
 6982     S1  D11       798.0     128.0           0.051            0.390625
 2327     S1   B3       800.0     128.0           0.050            0.390625
 4655     S1   C7       800.0     128.0           0.049            0.390625
 6983     S1  D11       800.0     128.0           0.051            0.390625)

# Part 1

The following Beer-Lambert equation can be used to determine the relationship between absorbance ($A$) and concentration ($c$):

$$
A = \Epsilon\ c\ l
$$

where $\Epsilon$ is the absorptivity coefficient and $l$ is optical path length in cm. Both $A$ and $E$ are functions of wavelengths. For the experiments we conduct $l$ is equal to 1 cm. 

We assume that when measuring multiple chemicals their absorbances can be added such that for $N$ chemicals, $A = \sum_{i=1}^{N}A_i$. When measuring a sample in the spectrophotometer (chemical in a solvent), we also measure the solvent only and call this a Blank.

The measured absorbance (labelled $A_{obs}$) is the combined absorbence of all of the chemicals in the solvent (labelled $A_{Blank}$) and the the absorbence of the pigments of interest (labelled $A_{S1}$). Under the assumption above, that the absorbances simply add combination, $A_{obs} = A_{S1} + A_{Blank}$, the blank-corrected absorbance is

$$
A_{S1} = A_{Obs} - A_{Blank}
$$

To apply this correction function, I will average $A_{Blank}$ at each measured wavelength, and subtract it from $A_{Obs}$.



In [13]:
def calculate_corrected_absorbance(df):
    # Takes a pandas dataframe and calculates the corrected absorbance
    # for each wavelength, by subtracting the mean absorbance of the blank from 
    # the sample absorbance.
    
    # Split the dataframes into signal and blank
    df_S1 = df[df["Sample"] != "Blank"]
    df_blank = df[df["Sample"] == "Blank"]

    # Calculate the mean absorbance for each wavelength in the Blank samples
    mean_abs_blank = df_blank.groupby("Wavelength").mean("Raw Absorbance")["Raw Absorbance"]
    
    # Calculate the corrected absorbance for each wavelength in the signal samples
    df_S1["Absorbance"] = df_S1["Raw Absorbance"] - mean_abs_blank[df_S1["Wavelength"]].values
    
    return df_S1, mean_abs_blank

df_S1, mean_abs_blank = calculate_corrected_absorbance(df_calibration)
df_S1.head()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,Sample,Well,Wavelength,Dilution,Raw Absorbance,Concentration mg/L,Absorbance
0,S1,A1,220.0,1.0,3.736,50.0,0.133
2328,S1,B5,220.0,1.0,2.956,50.0,-0.647
4656,S1,C9,220.0,1.0,3.234,50.0,-0.369
1,S1,A1,222.0,1.0,3.389,50.0,-0.166333
2329,S1,B5,222.0,1.0,3.329,50.0,-0.226333


In [14]:
fig = px.scatter( df_S1, 
                 x = "Wavelength", 
                 y = "Raw Absorbance", 
                 color = "Concentration mg/L", 
                 labels={
                     "Wavelength": "Wavelength (nm)",
                 },
                 title = "Raw absorbance curve for S1")
fig.show()

fig = px.line( mean_abs_blank, 
                 title = "Blank absorbance curve")
fig.show()

fig = px.scatter( df_S1, 
                 x = "Wavelength", 
                 y = "Absorbance", 
                 color = "Concentration mg/L", 
                 labels={
                     "Wavelength": "Wavelength (nm)",
                 },
                 title = "Corrected absorbance curve for S1")
fig.show()




In [15]:
# From the plots above we can see
# 1) There are anomalous readings at the start of the wavelength range < 340 nm, likely due to the limit of detection of the machine.
# 2) The absorbance generally increases with concentration as expected. 

# First I will limit the dataset to around the peak
wav_low = 450
wav_high = 600

df_S1 = df_S1[(df_S1["Wavelength"] > wav_low ) & (df_S1["Wavelength"] < wav_high)] 

# and then plot
fig = px.scatter( df_S1, 
                 x = "Wavelength", 
                 y = "Absorbance", 
                 color = "Concentration mg/L", 
                 labels={
                     "Wavelength": "Wavelength (nm)",
                 },
                 title = "Corrected absorbance curve for S1")
fig.show()

According to the Beer-Lambert equation, the absorbance ($A_{S1}$) is a linear function of the concentration($c$). Therefore, therefore the ratio of absorbance to concentration should collapse down to a function that is proportional to $\Epsilon$ is the absorptivity coefficien. 

$$
\Epsilon = \frac{ A }{c\ l}
$$


In [16]:
df_S1["Absorptivity"] = df_S1["Absorbance"] / df_S1["Concentration mg/L"] / 0.01

fig = px.scatter( df_S1, 
                 x = "Wavelength", 
                 y = "Absorptivity", 
                 labels={
                     "Wavelength": "Wavelength (nm)",
                     "Absorptivity": "Absorptivity",
                 },
                 title = "Corrected Absorptivity curve for S1")
fig.show()

# Part 2: Predicting the sample concentration
 Now I move on to predicting the sample concentration. 

In [17]:
df_sample.head(), df_sample.tail()

(     Sample Well  Wavelength  Dilution  Raw Absorbance
 873   Blank   B1       220.0       1.0           3.977
 1164  Blank   B2       220.0       1.0           3.393
 1455  Blank   B3       220.0       1.0           3.439
 874   Blank   B1       222.0       1.0           3.618
 1165  Blank   B2       222.0       1.0           3.726,
     Sample Well  Wavelength  Dilution  Raw Absorbance
 580     X1   A2       798.0       1.0           0.022
 871     X1   A3       798.0       1.0           0.015
 290     X1   A1       800.0       1.0           0.014
 581     X1   A2       800.0       1.0           0.022
 872     X1   A3       800.0       1.0           0.015)

In [18]:
fig = px.scatter( df_sample, 
                 x = "Wavelength", 
                 y = "Raw Absorbance", 
                 color = "Sample", 
                 labels={
                     "Wavelength": "Wavelength (nm)",
                 },
                 title = "Raw absorbance curve for sample")
fig.show()

In [19]:
df_X1, mean_abs_blank_X1 = calculate_corrected_absorbance(df_sample)

fig = px.scatter( df_X1, 
                 x = "Wavelength", 
                 y = "Raw Absorbance", 
                 labels={
                     "Wavelength": "Wavelength (nm)",
                 },
                 title = "Raw absorbance curve for X1")
fig.show()

fig = px.line( mean_abs_blank_X1, 
                 title = "Blank absorbance curve")
fig.show()

fig = px.scatter( df_X1, 
                 x = "Wavelength", 
                 y = "Absorbance", 
                 labels={
                     "Wavelength": "Wavelength (nm)",
                 },
                 title = "Corrected absorbance curve for X1")
fig.show()






A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [20]:
# I compare the background signals of the two datasets to see if they are similar. 
# If they were to differ, then it might suggest that the machines have different gain settings.

background_ratio = mean_abs_blank_X1/mean_abs_blank
print(f"{np.mean(background_ratio)} +/- {np.std(background_ratio)}")

# The background signals are similar, so I can assume that the same gain was used in both settings.

1.0 +/- 0.0


## Simple method for estimating the concentration 

First, I assume that, given the chemicals in question are the same in the sample and calibration datasets, they should have the same Absorptivity. Therefore, I can invert the Beer-Lambert equation to estimate the concentration. 


$$
c = \frac{ A_{sample} }{\Epsilon_{calibration}  \ l}
$$

As the B-L eqn is linear, this is a linear interpolation method.

In [23]:
# Simple method for concentration estimation
# First I limit the dataset to around the peak
df_X1 = df_X1[(df_X1["Wavelength"] > wav_low ) & (df_X1["Wavelength"] < wav_high)]

# I calculate the mean and standard deviation of the absorptivity for each wavelength in the calibration dataset.
absorptivity_mean = df_S1.groupby("Wavelength").apply(lambda x: np.mean(x["Absorptivity"]))
absorptivity_std = df_S1.groupby("Wavelength").apply(lambda x: np.std(x["Absorptivity"]))
                                       
# I use a linear interpolation to esimate the concentration of the sample from the sample absorptivity.                                       
df_X1["Concentration estimate"] = df_X1["Absorbance"] / absorptivity_mean[df_X1["Wavelength"]].values / 0.01

fig = px.scatter( df_X1,
                x = "Wavelength", 
                y = "Concentration estimate",
                labels={ "Wavelength": "Wavelength (nm)",
                        "Concentration estimate": "Concentration estimate (mg/L)"},
                title = "Concentration estimate for X1")
fig.show()