# Notebook to analyse measured concentrations from MARGA
Read data from Excel files into Pandas dataframes

In [None]:
# Import modules
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

In [None]:
# Set some parameters
datadir = '/data/users/bdrummon/obs/nitrate_measurements_JanMay2020/'
auchencorth_filename = '2020filtered_Metoffice.xlsx'
chilbolton_filename1 = 'Chilbolton_JanMarch2020.csv'
chilbolton_filename2 = 'Prov_CHBO_MARGA_April-May_2020.xlsx'

### Function to load Auchencorth Moss data
A function to load data for the Auchencorth Moss Excel file, returns a dataframe

In [None]:
def load_auchencorth_moss():
    
    mapper = {
        'Time_date' : 'time',
        'NH4_PM2_5' : 'PM2p5_NH4',
        'NO3_PM2_5' : 'PM2p5_NO3',
        'SO4_PM2_5' : 'PM2p5_SO4'
    }
    
    # Load file into dataframe
    df = pd.read_excel(datadir+auchencorth_filename, sheet_name='2020_filtered')
    
    # Get subset of columns
    df = df[['Time_date', 'NH4_PM2_5', 'NO3_PM2_5', 'SO4_PM2_5', 'NH3', 'HNO3', 'SO2']]
    
    # Rename columns
    df = df.rename(columns=mapper)
    
    # Drop the units row
    df = df.drop(0)
    
    # Convert string to datetime
    df['time'] = pd.to_datetime(df['time'])
    
    # Make datetime the index
    df = df.set_index('time')

    # Convert to float
    df = df.astype(float)
    
    return df

### Functions to load Chilbolton data
- The Chilbolton data is split over two files - one downloaded from UK Air and one obtained directly from CEH (unratified data) - these are in different formats so we read them separately and then combine

In [None]:
# Function to convert to datetime 
# We have to have a special way to handle midnight, which in the data is marked as 24:00:00
# Pandas can only handle hours 0 -> 23 so we have to convert 24:00:00 of day D to 00:00:00 of day D+1
def my_to_datetime(date_str):
    if date_str[10:12] != '24':
        return pd.to_datetime(date_str, format='%Y-%m-%d%H:%M:%S')

    date_str = date_str[0:10] + '00' + date_str[12:]
    return pd.to_datetime(date_str, format='%Y-%m-%d%H:%M:%S') + \
           dt.timedelta(days=1)

In [None]:
def load_chilbolton_observatory1():
    
    mapper = {
        'ammonium in PM2.5' : 'PM2p5_NH4',
        'nitrate in PM2.5' : 'PM2p5_NO3',
        'sulphate in PM2.5' : 'PM2p5_SO4',
        'gaseous nitric acid' : 'HNO3',
        'gaseous ammonia' : 'NH3',
        'gaseous sulphur dioxide' : 'SO2'
    }
    
    # Load file into dataframe
    df = pd.read_csv(datadir+chilbolton_filename1, header=4, skipfooter=1)
    
    # Get subset of columns and rename
    df = df[['Date', 'Time']+list(mapper.keys())].rename(columns=mapper)
        
    # Combine date and time and convert to datetime
    df['time'] = df['Date'] + df['Time']
    df['time'] = df.time.apply(my_to_datetime)
    df = df.drop(columns=['Date', 'Time'])
    
    # Make time the index
    df = df.set_index('time')
    
    # Replace missing data with NaN
    df = df.replace('No data', np.nan)
    df = df.dropna()
    
    # Drop the last row (as this time is repeated in the April-May dataset)
    df = df[:-1]
    
    # Convert to float
    df = df.astype(float)
    
    return df

In [None]:
def load_chilbolton_observatory2():
    
    mapper = {
        'm_NH4_2.5' : 'PM2p5_NH4',
        'm_NO3_2.5' : 'PM2p5_NO3',
        'm_SO4_2.5' : 'PM2p5_SO4',
        'm_HNO3_g' : 'HNO3',
        'm_NH3_g' : 'NH3',
        'm_SO2_g' : 'SO2'
    }
    
    # Load file into dataframe
    dfin = pd.read_excel(datadir+chilbolton_filename2, sheet_name='April_May_2020')
    
    # Create new dataframe
    df = pd.DataFrame()
    
    # Loop over requested species
    for species in mapper:
    
        # Get measurement times and measured concentrations for current species
        dfsp = dfin[dfin['parameter_id'] == species]
        dfsp = dfsp[['measurement start date', 'measurement start time', 'measurement']]
        
        # Combine the time and date columns and convert to datetime format
        dfsp['time'] = dfsp['measurement start date'].astype(str) + dfsp['measurement start time'].astype(str)
        dfsp['time'] = pd.to_datetime(dfsp['time'], format='%Y-%m-%d%H:%M:%S')
        
        # Drop redundant columns
        dfsp = dfsp.drop(columns=['measurement start date', 'measurement start time'])
        
        # Make time the index
        dfsp = dfsp.set_index('time')
        
        # Add new column to dataframe 
        df[mapper[species]] = dfsp['measurement']
        
        # Replace missing value flags (-999 in this dataset) with NaN
        mask = np.isclose(df[mapper[species]], -999.)
        df.loc[mask, mapper[species]] = np.nan
  
    return df

In [None]:
def load_chilbolton_observatory():
    
    # Load January to March data
    df = load_chilbolton_observatory1()
    
    # Load April to May data
    df2 = load_chilbolton_observatory2()
    
    # Combine for a single timeseries
    df = df.append(df2)
    
    return df

# Load Chilbolton and Auchencorth data and calculate the gas ratio

In [None]:
# Load the data for the two sites using the functions defined above.
chil_df = load_chilbolton_observatory()
auch_df = load_auchencorth_moss()

# Option to average the data. e.g. 'H' hourly (raw data timestep), 'D' daily, 'W' weekly.
chil_df = chil_df.resample('H').mean()
auch_df = auch_df.resample('H').mean()

# Option to filter the data by a start and end date (e.g. filtering for April to match the model simulations)
#start_date = pd.to_datetime('2020-04-0100:00:00',format='%Y-%m-%d%H:%M:%S')
#end_date   = pd.to_datetime('2020-05-0100:00:00',format='%Y-%m-%d%H:%M:%S')
#chil_df = chil_df[chil_df.index >= start_date]
#chil_df = chil_df[chil_df.index < end_date]
#auch_df = auch_df[auch_df.index >= start_date]
#auch_df = auch_df[auch_df.index < end_date]

# Check for any negative values in the data.

In [None]:
# Check for any negative values in the Chilbolton data.
print('Checking Chilbolton Observatory Data')
if np.nanmin(chil_df['PM2p5_SO4']) < 0 :
    print('   - Negative values in PM2p5_SO4')
if np.nanmin(chil_df['PM2p5_NO3']) < 0 :
    print('   - Negative values in PM2p5_NO3')
if np.nanmin(chil_df['PM2p5_NH4']) < 0 :
    print('   - Negative values in PM2p5_NH4')
if np.nanmin(chil_df['NH3']) < 0 :
    print('   - Negative values in NH3')
if np.nanmin(chil_df['HNO3']) < 0 :
    print('   - Negative values in HNO3')
if np.nanmin(chil_df['SO2']) < 0 :
    print('   - Negative values in SO2')

# Check for any negative values in the Auchencorth Moss data.
print('Checking Auchencorth Moss Data')
if np.nanmin(auch_df['PM2p5_SO4']) < 0 :
    print('   - Negative values in PM2p5_SO4')
if np.nanmin(auch_df['PM2p5_NO3']) < 0 :
    print('   - Negative values in PM2p5_NO3')
if np.nanmin(auch_df['PM2p5_NH4']) < 0 :
    print('   - Negative values in PM2p5_NH4')
if np.nanmin(auch_df['NH3']) < 0 :
    print('   - Negative values in NH3')
if np.nanmin(auch_df['HNO3']) < 0 :
    print('   - Negative values in HNO3')
if np.nanmin(auch_df['SO2']) < 0 :
    print('   - Negative values in SO2')

# Plot timeseries of the Chilbolton data.

In [None]:
# Plot simple timeseries of the Chilbolton data.
chil_df.plot(y='PM2p5_SO4',title='Sulphate')
chil_df.plot(y='PM2p5_NO3',title='Nitrate')
chil_df.plot(y='PM2p5_NH4',title='Ammonium')
chil_df.plot(y='NH3',title='Ammonia')
chil_df.plot(y='HNO3',title='Nitric Acid')
chil_df.plot(y='SO2',title='Sulphur Dioxide')

# Plot timeseries of the Auchencorth data.

In [None]:
# Plot simple timeseries of the Auchencorth data.
auch_df.plot(y='PM2p5_SO4',title='Sulphate')
auch_df.plot(y='PM2p5_NO3',title='Nitrate')
auch_df.plot(y='PM2p5_NH4',title='Ammonium')
auch_df.plot(y='NH3',title='Ammonia')
auch_df.plot(y='HNO3',title='Nitric Acid')
auch_df.plot(y='SO2',title='Sulphur Dioxide')

# Calculate the gas ratio

In [None]:
# Filter the dataframes to leave only timesteps where we have data for all required species.
chil_df = chil_df[np.isfinite(chil_df['PM2p5_NH4']) & np.isfinite(chil_df['PM2p5_SO4']) & np.isfinite(chil_df['PM2p5_NO3']) & np.isfinite(chil_df['HNO3']) & np.isfinite(chil_df['NH3'])]
auch_df = auch_df[np.isfinite(auch_df['PM2p5_NH4']) & np.isfinite(auch_df['PM2p5_SO4']) & np.isfinite(auch_df['PM2p5_NO3']) & np.isfinite(auch_df['HNO3']) & np.isfinite(auch_df['NH3'])]

# Convert the units from ug/m3 to ppb.
STD_TEMP = 273.15
STD_PRES = 1e5
R_GAS = 8.314
chil_nh3  = chil_df['NH3'] * 1e3 * ( (R_GAS * STD_TEMP) / (STD_PRES * 17.031) )
chil_hno3 = chil_df['HNO3'] * 1e3 * ( (R_GAS * STD_TEMP) / (STD_PRES * 63.01) )
chil_nh4  = chil_df['PM2p5_NH4'] * 1e3 * ( (R_GAS * STD_TEMP) / (STD_PRES * 18.038) )
chil_so4  = chil_df['PM2p5_SO4'] * 1e3 * ( (R_GAS * STD_TEMP) / (STD_PRES * 96.06) )
chil_no3  = chil_df['PM2p5_NO3'] * 1e3 * ( (R_GAS * STD_TEMP) / (STD_PRES * 62.0049) )
chil_df['GR_ppb'] = (chil_nh3 + chil_nh4 - (2 * chil_so4)) / (chil_hno3 + chil_no3)
auch_nh3  = auch_df['NH3'] * 1e3 * ( (R_GAS * STD_TEMP) / (STD_PRES * 17.031) )
auch_hno3 = auch_df['HNO3'] * 1e3 * ( (R_GAS * STD_TEMP) / (STD_PRES * 63.01) )
auch_nh4  = auch_df['PM2p5_NH4'] * 1e3 * ( (R_GAS * STD_TEMP) / (STD_PRES * 18.038) )
auch_so4  = auch_df['PM2p5_SO4'] * 1e3 * ( (R_GAS * STD_TEMP) / (STD_PRES * 96.06) )
auch_no3  = auch_df['PM2p5_NO3'] * 1e3 * ( (R_GAS * STD_TEMP) / (STD_PRES * 62.0049) )
auch_df['GR_ppb'] = (auch_nh3 + auch_nh4 - (2 * auch_so4)) / (auch_hno3 + auch_no3)

# Calculate the gas ratio for both sites.
chil_df['GR_ugm3'] = (chil_df['NH3'] + chil_df['PM2p5_NH4'] - (2 * chil_df['PM2p5_SO4'])) / (chil_df['HNO3'] + chil_df['PM2p5_NO3'])
auch_df['GR_ugm3'] = (auch_df['NH3'] + auch_df['PM2p5_NH4'] - (2 * auch_df['PM2p5_SO4'])) / (auch_df['HNO3'] + auch_df['PM2p5_NO3'])

# Plot the gas ratio data for Chilbolton Observatory.

In [None]:
# Plot the Chilbolton gas ratio data.
chil_plot = plt.plot(chil_df.index,chil_df['GR_ppb'],lw=1,c='#1f77b4')
chil_plot = plt.plot([chil_df.index[0],chil_df.index[-1]],[0,0],lw=1,c='#2ca02c')
chil_plot = plt.plot([chil_df.index[0],chil_df.index[-1]],[1,1],lw=1,c='#ff7f0e')
plt.ylim(-2,50)
plt.title('Chilbolton Observatory')
plt.ylabel('Gas Ratio')
plot_directory = '/home/h01/ersmith/COVID-19/AQ_CV19_analysis/'
plt.savefig(plot_directory+'Chilbolton_Observatory.png',dpi=150)

# Plot the gas ratio data for Auchencorth Moss.

In [None]:
# Plot the Auchencorth gas ratio data.
auch_plot = plt.plot(auch_df.index,auch_df['GR_ppb'],lw=1,c='#1f77b4')
auch_plot = plt.plot([auch_df.index[0],auch_df.index[-1]],[0,0],lw=1,c='#2ca02c')
auch_plot = plt.plot([auch_df.index[0],auch_df.index[-1]],[1,1],lw=1,c='#ff7f0e')
plt.ylim(-2,50)
plt.title('Auchencorth Moss')
plt.ylabel('Gas Ratio')
plot_directory = '/home/h01/ersmith/COVID-19/AQ_CV19_analysis/'
plt.savefig(plot_directory+'Auchencorth_Moss.png',dpi=150)