In [None]:
# Import functions and settings used in this notebook
%run ./modules/data_reduction_modules.ipynb

# pCO₂ Data Reduction part 4 - UW pCO₂ cruise

We will use the same data set as in part 1 but with all recorded variables. We will perform all necessary steps as also described in Denis' talk.

## Data set
Here you find an example dataset, which was prepared for this exercise. You can download it to your computer and have a look at it: https://fileshare.icos-cp.eu/s/fxXNYSiPjoGJeZ9
This is not needed for further calculations but you might want to know how your dataset looks. The columns are described below in the meta data section. The dataset starts with a zero and span measurement to adjust the Licor. Normally one uses CO₂ free air as zero gas and the highest reference gas as span gas. This is followed by a calibration using three non-zero reference gases.


### Here are the metadata:
**xCO₂ sensor:** Licor 6262  
**xCO₂ concentration of reference gases:**  

| Standard | Concentration |
| :- | -: |
| STD1 | 0 ppm |
| STD2 | 228.4 ppm | 
| STD3 | 412.5 ppm |
| STD4 | 507.77 ppm |

**column header:**

| Column | Notes | GO specific |
| :- | :- | :-|
| Timestamp | | |
| Type | Type of measurement (`STDx`: standard gas x, `EQU`: equilibrator,`ATM` )** | |
| error | GO instrument specific error code - will not be used here | * |
| PC date and time | date and time from system clock | |
| GPS date and time | datae and time from GPS signal | |
| latitude | in °N |   |
| longitude | in °E |   |
| equ temp | temperature measurement inside equilibrator in °C | |
| std val | referenced value of CO₂ standards as entered by the user |  |
| CO2 mv | raw data readings for CO₂ channel from Licor |  |
| CO2 um/m | xCO₂ measurement from LI6262 in µmol/mol or ppm |  |
| H2O mv | raw data readings for H2O channel from Licor |  |
| H2O mm/m | xH₂O measurement from LI6262 in mmol/mol|  |
| licor temp | temperature inside Licor cell in °C |  |
| licor press | pressure inside Licor cell in mbar |  |
| atm press | atmospheric pressure measured 40 m above sea level (ship specific) |  |
| equ press | pressure difference between inside and outside the equilibrator |  * can also be absolute |
| H2O flow | water flow through the GO system in L/min | |
| licor flow | gas flow through the Licor cell in mL/min |
| equ pump | equilibrator pump speed | * |
| vent flow | gas flow in mL/min at equilibrator vent | * |
| atm cond | GO housekeeping variable | * |
| equ cond | GO housekeeping variable | * |
| drip1 | GO housekeeping variable |  |
| drip2 | GO housekeeping variable | * |
| cond temp | temperature inside condenser in °C | * |
| dry box temp | housing temperature |  |
| deck box temp | GO housekeeping variable | * |
| WB pressure | pressure next to equilibrator in mbar | |
| Intake Temp | intake temperature in °C, here from a SBE38 | |
| Salinity | salinity in PSU, here frome a SBE21 | |


** Note the type for the first two measurements. It just tells you that the system starts running and that water filter is flushed (both GO specific). 

### Next cell (1)
Load the data file and display. Here we will use the GPS date/time as our time stamp.


In [None]:
input_data = pandas.read_csv("data/part4_input_raw_cruise_dat.txt", sep="\t")

# Fix data types
input_data['std val'] = input_data['std val'].apply(pandas.to_numeric)

# We also create a real timestamp field from the GPS date and time
input_data['Timestamp'] = input_data[['GPS date', 'gps time']].apply(lambda x: 
                            pandas.to_datetime(' '.join(x), format="%d.%m.%Y %H:%M:%S"), axis=1)

# display the first 100 lines
input_data[0:99]

### Cell (2) - xCO2 calibration
This is the calibration of our xCO₂ measurements from part 1. It's basically the same as in cell (10) of "Gas Calibration.ipynb". The resulting corrected xCO₂ is saved in xco2_corr.

In [None]:
# These are our true gas standard concentrations
ref1 = 0
ref2 = 228.4
ref3 = 412.5
ref4 = 507.77

# Add a column for the calibrated CO2
input_data['xco2_corr'] = np.nan

# Function to predict the measurement of one of the
# standards at a specified time
def measured_co2_at_row(standard, time):
    
    # Find the row for the required standard that is before the specified time
    # Note that there are a lot of ways to do this in Python - this is one that's easy to read.
    prior = input_data[(input_data['Type'] == standard) & (input_data['Timestamp'] < time)].iloc[-1]
    
    # Find the row for the required standard that is after the specified time
    post = input_data[(input_data['Type'] == standard) & (input_data['Timestamp'] > time)].iloc[0]
    
    # Get the times and measured values together
    
    # The times are converted to seconds
    times = [prior['Timestamp'].timestamp(), post['Timestamp'].timestamp()]
    measurements = [prior['CO2 um/m'], post['CO2 um/m']]

    # Make the linear fit
    slope, intercept, r, p, std_err = stats.linregress(times, measurements)
    
    # Calculate the value at the specified time and return it
    return time.timestamp() * slope + intercept

# Function to calibrate the CO2 value for a given row
# in the input data
def calibrate(index, row):
    
    # Calculate the measured standard values at the specified time
    std2_at_time = measured_co2_at_row('STD2', row['Timestamp'])
    std3_at_time = measured_co2_at_row('STD3', row['Timestamp'])
    std4_at_time = measured_co2_at_row('STD4', row['Timestamp'])
    
    # Perform the linear regression to the true standard concentrations
    measured_stds = [std2_at_time, std3_at_time, std4_at_time]
    true_stds = [ref2, ref3, ref4] # From cell 3
    slope, intercept, r, p, std_err = stats.linregress(measured_stds, true_stds)

    # Convert the regression
    converted_slope = 1 / slope
    converted_intercept = (intercept * -1) / slope
    
    # Calculate the calibrated CO2 and return it
    return row['CO2 um/m'] * converted_slope + converted_intercept

# Loop through each row in the input dataset
for row_index, row_data in log_progress(input_data.iterrows(), every=10, size=input_data.shape[0], name='Calibrating'):
    
    # We only want to work with EQU rows
    if row_data['Type'].startswith('EQU') or row_data['Type'].startswith('ATM'):

        # Extract the timestamp and measured CO2 value
        time = row_data['Timestamp']
        measured = row_data['CO2 um/m']
        
        # Run the calibration function
        calibrated = calibrate(row_index, row_data)
        
        # Add the data to the output
        input_data['xco2_corr'][row_index] = calibrated

# display data, first 100 rows
input_data[['Timestamp', 'CO2 um/m', 'xco2_corr']][0:99]


### Cell (3) - check calibration
By plotting the difference between the measured and calibrated xCO₂ one can easily see the magnitude of adjustment that was applied to the data set. When the system is running optimally the adjustments shouldn't exceed 5 ppm. But adjustments of more than 5 ppm don't necessarily mean that the data are bad. It just needs further examination.

In [None]:
calibration_plot = time_series(input_data['Timestamp'], input_data['CO2 um/m'] - input_data['xco2_corr'],
  'Measured - Correlated CO₂ [ppm]')

show(calibration_plot)

### Cell (4) - quick check of data
First one perform an automatic range check that captures only clear outliers (like salinity > 50). This data could be flagged automatically as bad or questionable.  
After the automatic range check we should quickly check the data set for erronous measurements. This can be due to malfunction of the whole system or just some spikes in the data. The next cell plots several data that are relevant for the following fCO₂ calculations. Please use the drop down menu to click through the variables.  
Below the plot you find a table with some explanations what to look for.

In [None]:
plotsource = ColumnDataSource(input_data)
selectable_vars = ['equ temp', 'Intake Temp', 'Salinity', 'atm press', 'WBPressure','equ press',
                   'licor flow', 'vent flow', 'equ pump', 'H2O flow', 'licor temp', 'licor press']
column_select = Select(title='Variable', value=selectable_vars[0], options=selectable_vars)

p = figure(plot_width=600, plot_height=600, x_axis_type='datetime', min_border_left=50)
circle = p.circle('Timestamp', column_select.value, size=5, source=plotsource)

update_plot = CustomJS(args=dict(circle=circle), code="""
    circle.glyph.y = {field: cb_obj.value};
  """)

column_select.js_on_change('value', update_plot)
show(column(column_select, p))


| Variable | long name |Notes |
| :- | :-| :-|
| equ temp | Equilibrator temperature | Compare the equ temp with the intake temp (intake temperature or SST). The equ temp should be higher than the intake (might be different in the low latitudes). They shouldn't differ more than 1.5 degC. This could mean too low water flow etc. |
| Intake temp | Intake temperature | see equ temp |
| Salinity | Salinity | Check if the salinity is in the expected range. Since it's impact on the CO₂ calculation is rather small we don't need high precision. |
| atm press | atmospheric pressure | Check for major outlier. Compare to other pressures of the system. Mostly it's smaller than the pressure next to the instrument when the instrument is inside a ship. The inside of ships has often a higher pressure than the outside (fans, A/C). Also the height of the atmospheric pressure sensor needs to be taken into account. |
| WBPressure | Wet box pressure | Pressure measurement next to the instrument. See comments at atm press. |
| equ press | Equilibrator pressure | Pressure inside the equilibrator. This can be either an absolute measurement inside the equilibration chamber or a differential measurement between the equilibration chamber and outside. The differential pressure gives the possibility for additional information. It should be at a more or less constant value (+- 0.4 mbar) otherwise it can indicate leaks or blockages. |
| licor flow | gas flow in Licor | Check if the gas flows are at a constant (+- 20 ml/min) value. |
| vent flow | Gas flow at equilibrator vent | Since the equilibrator is open to the atmosphere air can get flow in or out the system. A huge change in vent flow indicates alos leackages or blockages. |
| equ pump | equilibrator pump speed | The GO system can adjust the pump speed of the equilibrator pump and will adjust it when the licor flow is below/above certain limits. It is a random number, but changes can again indicate leackages or blockages. |
| H2O flow | water flow | One needs to know the lower flow limit until the equilibration works fine. This is instrument specific. |
| licor temp | Licor cell temperature | Just for information. If the temperature increases too much the Licor will stop working. |
| licor press | Licor cell pressure | Strong pressure gradients can be an indication for leackages or blockages.




### Cell (5) - position data
Check the position data. Here we only plot the cruise track.

In [None]:
# Extract the cooridnates
coord = zip(input_data['latitude'], input_data['longitude'])

# Create the map
mid_lat = min(input_data['latitude']) + ((max(input_data['latitude']) - min(input_data['latitude'])) / 2)
mid_lon = min(input_data['longitude']) + ((max(input_data['longitude']) - min(input_data['longitude'])) / 2)
myMap = folium.Map(location=[mid_lat, mid_lon], zoom_start=5)

# Add dataframe coordinates
folium.PolyLine(locations = coord,color='black').add_to(myMap)
myMap

### Cell (7) - correcting atm. pressure
Very often the atmospheric pressure measurement onboard ships is taken way above the sea level. Since we are interested in processes at the sea level we need to correct this measurement to sea level (World Meteorological Organization / CIMO/ET-Stand-1/Doc. 10 (20.XI.2012), https://www.wmo.int/pages/prog/www/IMOP/meetings/SI/ET-Stand-1/Doc-10_Pressure-red.pdf):

\begin{equation*}
P_{atm, sealevel} = atm\ press * \left(1 - \frac{0.0065 * h}{SST + 273.15 + 0.0065 * h}\right)^{-5.257}
\end{equation*}  

where $h$ is the height of the measurement in metres and $SST$ is the seawater temperature (we assume air temperature = water temperature, the temperature effect is only 0.02 mbar per °C).


In [None]:
# correcting atm. pressure to sea level
def press_at_sea_level(measured_pressure, sst, height):
    return measured_pressure * pow((1 - (0.0065 * height)/(sst + 0.0065 * height + 273.15)),-5.257)

# height above sea level in m
atm_sensor_height = 10

# Add a column for the adjusted pressure
input_data['P_atm_sealevel'] = press_at_sea_level(
    input_data['atm press'], input_data['Intake Temp'], atm_sensor_height)

# # display data, first 100 rows
input_data[['Timestamp', 'Intake Temp', 'atm press', 'P_atm_sealevel']][0:99]


### Calculate pCO₂ and fCO₂
The corrected CO₂ value (`xco2_corr`) is xCO₂ in dry air at the temperature of equilibration. We need to convert this to pCO₂ in water at the sea surface temperature. And since we have measurements for both, seawater and atmospheric air, we have to do it for both slightly different. The subscript $equ$ stands for seawater measurement and the subscript $atm$ for the atmospheric measurements.

### Cell (7) - xCO₂(dry) to pCO₂(dry)

First we use the corrected xCO₂ and the pressure inside the equilibrator (equ) or at sealevel (atm) to calculate the dry pCO₂. This instrument uses a differential pressure sensor inside the equilibrator. The column in  the data file is called (a bit misleading) 'equ press' but in the following calculation we call it $P_{diff}$. The differential pressure must be subtracted from a reference pressure sensor (WB pressure) and then converted to atmospheres:


$$P_{equ} = \left(P_{WB} - P_{diff}\right)\times100\times9.86923266716013\text{E-10 [atm]}$$ 

The pressure $P_{equ}$ and $P_{atm, sealevel}$ can then be used to calculate $pCO_2(dry)$:

$$\left(pCO_2\right)_{equ}^{dry} = \left(xCO_2\right)^{dry} \times P_{equ}\text{ [µatm]}$$
$$\left(pCO_2\right)_{atm}^{dry} = \left(xCO_2\right)^{dry} \times P_{atm, sealevel}\text{ [µatm]}$$


In [None]:
# Create new columns
input_data['pequ'] = np.nan
input_data['pco2_dry'] = np.nan

for row_index, row_data in input_data.iterrows():
    if row_data['Type'].startswith('EQU'):
        pequ = (row_data['WBPressure'] - row_data['equ press']) * 100 * 0.00000986923266716013   # wet box press - diff press
        input_data.at[row_index, 'pequ'] = pequ
        input_data.at[row_index, 'pco2_dry'] = row_data['xco2_corr'] * pequ                  # equ pCO2 calculation 

    elif row_data['Type'].startswith('ATM'):
        input_data.at[row_index, 'pco2_dry'] = row_data['xco2_corr'] * row_data['P_atm_sealevel'] * 100 * 0.00000986923266716013    # atm pCO2 calculation 

# display data, first 100 rows
input_data[['Timestamp', 'Type', 'equ press', 'WBPressure', 'pequ', 'P_atm_sealevel', 'CO2 um/m', 'xco2_corr', 'pco2_dry']][0:99]

### Cell (8) - pCO₂(dry) to pCO₂(wet)
To convert the dry into a wet pCO₂ we need to substract the water vapor pressure pH₂O. 

$$\left(pCO_2\right)_{equ}^{wet} = \left(xCO_2\right)_{equT}^{dry}\left[ P_{equ} - \left(pH_2O\right)_{equ}\right]\text{ [µatm]}$$
$$\left(pCO_2\right)_{atm}^{wet} = \left(xCO_2\right)_{equT}^{dry}\left[ P_{atm,sealevel} - \left(pH_2O\right)_{atm}\right]\text{ [µatm]}$$

where $pH_2O$ is the water vapour pressure at the sea surface salinity and equilbrator temperature. The water vapor pressure can be calculated from salinity ($S$) and temperature (Weiss and Price, 1980). As mentioned above we use the temperature inside the equilibrator $equT_K$ for sewater $pCO_2$ and $SST_K$ for the atmospheric $pCO_2$, where the temperature is given in Kelvin.
$$
\left(pH_2O\right)_{equ} = exp\left[ 24.4543 - 67.4509\left( \frac{100}{equT_K} \right) - 4.8489 ln\left( \frac{equT_K}{100} \right) - 0.000544S \right]\text{ [atm]}
$$
$$
\left(pH_2O\right)_{atm} = exp\left[ 24.4543 - 67.4509\left( \frac{100}{SST_K} \right) - 4.8489 ln\left( \frac{SST_K}{100} \right) - 0.000544S \right]\text{ [atm]}
$$


In [None]:
def calc_ph2o(temp, salinity):
    kelvin = temp + 273.15
    return math.exp(24.4543 - 67.4509 * (100/kelvin) - 4.8489 * math.log(kelvin/100) - 0.000544 * salinity)        

# Create columns
input_data['pco2_wet'] = np.nan

for row_index, row_data in log_progress(input_data.iterrows(), every=100, size=input_data.shape[0], name='Calculating'):
    if row_data['Type'].startswith('EQU'):
        
        ph2o_equ = calc_ph2o(row_data['equ temp'], row_data['Salinity'])
        input_data.at[row_index, 'pco2_wet'] = row_data['xco2_corr'] * (row_data['pequ'] - ph2o_equ)
        
    elif row_data['Type'].startswith('ATM'):
        ph2o_atm = calc_ph2o(row_data['Intake Temp'], row_data['Salinity'])
        P_atm_sealevel_atmos = row_data['P_atm_sealevel'] * 100 * 0.00000986923266716013
        input_data.at[row_index, 'pco2_wet'] = row_data['xco2_corr'] * (P_atm_sealevel_atmos - ph2o_atm)

# display data, first 100 rows
input_data[['Timestamp', 'Type', 'equ temp', 'pequ', 'P_atm_sealevel', 'xco2_corr', 'pco2_dry', 'pco2_wet']][0:99]


### Cell (9) - pCO₂ to fCO₂

CO₂ is a non-ideal gas, so we often use fCO₂ to account for its non-ideal behaviour. We calculate fCO₂ from pCO₂ as follows:

$$\left(fCO_2\right)_{equ}^{wet} = \left(pCO_2\right)_{equ}^{wet} \: exp \left\{ \frac{\left[ B \left( CO_2, equT_K \right) + 2 \left( 1 - \left(xCO_2\right)^{dry} \right)^2 \times \delta \left( CO_2, equT_K \right) \right] \times P_{equ} }{R\times equT_K} \right\}
$$
$$\left(fCO_2\right)_{atm}^{wet} = \left(pCO_2\right)_{atm}^{wet} \: exp \left\{ \frac{\left[ B \left( CO_2, SST_K \right) + 2 \left( 1 - \left(xCO_2\right)^{dry} \right)^2 \times \delta \left( CO_2, SST_K \right) \right] \times P_{equ} }{R\times SST_K} \right\}
$$

where $R$ is the gas constant, and $B$ and $\delta$ are the virial coefficients:
\begin{equation*}
B \left( CO_2, T \right) = -1636.75 + 12.0408\,T - 3.27957\times10^{-2}\,T^2 + 3.16528\times10^{-5}\,T^3
\end{equation*}


\begin{equation*}
\delta \left( CO_2, T \right) = 57.7 - 0.118\,T
\end{equation*}




In [None]:
# Calculate fCO2
input_data['fco2_wet'] = np.nan

def calc_fco2(pco2_wet, xco2_corr, temp, pressure):
    kelvin = temp + 273.15
    
    B = -1636.75 + 12.0408 * kelvin - 0.0327957 * math.pow(kelvin, 2) + (3.16528 * 1e-5) * math.pow(kelvin, 3)
    delta = 57.7 - 0.118 * temp
    
    return pco2_wet * math.exp(((B + 2 * math.pow(1 - xco2_corr * 1e-6, 2) * delta) * pressure) / (82.0575 * kelvin));

for row_index, row_data in log_progress(input_data.iterrows(), every=100, size=input_data.shape[0], name='Calculating'):
    if row_data['Type'].startswith('EQU'):
        input_data.at[row_index, 'fco2_wet'] = calc_fco2(row_data['pco2_wet'], row_data['xco2_corr'], row_data['equ temp'], row_data['pequ'])
    elif row_data['Type'].startswith('ATM'):
        P_atm_sealevel_atmos = row_data['P_atm_sealevel'] * 100 * 0.00000986923266716013
        input_data.at[row_index, 'fco2_wet'] = calc_fco2(row_data['pco2_wet'], row_data['xco2_corr'], row_data['Intake Temp'], P_atm_sealevel_atmos)

        
# display data, first 100 rows
input_data[['Timestamp', 'Type', 'Intake Temp', 'equ temp', 'pequ', 'P_atm_sealevel', 'xco2_corr', 'pco2_wet', 'fco2_wet']][0:99]


### Cell (10) - correct for $\Delta T$

Finally we must convert pCO₂ from equilibrator temperature to sea surface temperature:

\begin{equation*}
\left(pCO_2\right)_{SST}^{wet} = \left(pCO_2\right)_{equT}^{wet} \times exp \left\{ 0.0423 \left(SST - equT \right) \right\}
\end{equation*}

with $SST$ and $equT$ both in °K.


And finally we can calculate fCO₂ at SST as before:

\begin{equation*}
\left(fCO_2\right)_{SST}^{wet} = \left(fCO_2\right)_{equT}^{wet} \times exp \left\{ 0.0423 \left(SST_k - equT_k \right) \right\}
\end{equation*}

In [None]:
# in situ data

# create new columns
input_data['pco2_wet_sst'] = np.nan
input_data['fco2_wet_sst'] = np.nan

for row_index, row_data in input_data.iterrows():
    if row_data['Type'].startswith('EQU'):
        sst = row_data['Intake Temp'] 
        equt = row_data['equ temp'] 
        
        input_data.at[row_index, 'pco2_wet_sst'] = row_data['pco2_wet'] * math.exp(0.0423 * (sst - equt)) # temp correction after Takahashi
        input_data.at[row_index, 'fco2_wet_sst'] = row_data['fco2_wet'] * math.exp(0.0423 * (sst - equt)) # temp correction after Takahashi

# display data, first 100 rows
input_data[['Timestamp', 'Type', 'pco2_wet', 'fco2_wet', 'pco2_wet_sst', 'fco2_wet_sst']][0:99]

### Cell (11) - QC
The tool below is the same as in cell(4). You have the ability to plot different variables including the one we recently calculated. Play around with it and use the tool to QC the data set. At this point we can't manipulate the dataset. But write down your findings and ideas how to make this a SOCAT-ready data set.

In [None]:
plotsource = ColumnDataSource(input_data)
selectable_vars = ['equ temp', 'Intake Temp', 'Salinity', 'atm press', 'WBPressure','equ press', 'pequ', 'P_atm_sealevel',
                   'licor flow', 'vent flow','H2O flow', 'equ pump',  
                   'xco2_corr', 'pco2_wet', 'fco2_wet', 'fco2_wet', 'pco2_wet_sst', 'fco2_wet_sst']

column_select = Select(title='Variable', value=selectable_vars[0], options=selectable_vars)

p = figure(plot_width=600, plot_height=600, x_axis_type='datetime', min_border_left=50)
circle = p.circle('Timestamp', column_select.value, size=5, source=plotsource)

update_plot = CustomJS(args=dict(circle=circle), code="""
    circle.glyph.y = {field: cb_obj.value};
  """)

column_select.js_on_change('value', update_plot)


controls = column(column_select)
show(column(column_select, p))
