# Notebook to read in the observational data from Auchencorth Moss and Chilbolton Observatory.

Import the required modules.

In [None]:
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt

Define some key parameters.

In [None]:
datadir  = '/data/users/ersmith/Other/COVID-19/Particulate_Matter/Data_Files/'
std_temp = 273.15
std_pres = 1e5
r_gas    = 8.314

## Read the Auchencorth Moss data.

In [None]:
# Define the function to load and merge the Auchencorth Moss data.
def load_auchencorth_moss(year) :

    # Define the file name.
    file = '%s_Auchencorth_Moss.xlsx'%(year)

    # Create a data frame to hold the combined data.
    auch_df = pd.DataFrame()

    # Read the different pollutant data sheets.
    df_so4  = pd.read_excel(datadir+file,sheet_name='PM2p5_SO4')
    df_no3  = pd.read_excel(datadir+file,sheet_name='PM2p5_NO3')
    df_nh4  = pd.read_excel(datadir+file,sheet_name='PM2p5_NH4')
    df_hno3 = pd.read_excel(datadir+file,sheet_name='HNO3')
    df_nh3  = pd.read_excel(datadir+file,sheet_name='NH3')

    # Create a list of the data frames to process.
    df_list  = [df_so4,df_no3,df_nh4,df_hno3,df_nh3]
    df_names = ['PM2p5_SO4','PM2p5_NO3','PM2p5_NH4','HNO3','NH3']
    df_mass  = [96.06,62.0049,18.038,63.01,17.031]

    # Process the data.
    for x in range(len(df_list)) :

        # Convert the date and time information to datetime.
        # All data uses the year 2020 to enable them to be plotted on the same graph.
        datetime_data = ['2020-%s-%s-%s'%(str(df_list[x]['Date'][y])[5:7],\
                                          str(df_list[x]['Date'][y])[8:10],\
                                          str("{:02d}".format(int(str(df_list[x]['Time'][y])[0:2])-1)))\
                         for y in range(len(df_list[x]))]
        datetime_data = pd.to_datetime(datetime_data,format='%Y-%m-%d-%H')

        # Convert the concentration data to floats, replace any '<x' values with nan and convert units from ug/m3 to ppb.
        conc_data = df_list[x]['ug m-3'][:]
        for y in range(len(conc_data)) :
            if str(conc_data[y]).startswith('<') :
                conc_data[y] = np.float('NaN')
        conc_data = [(float(y) * 1e3 * ((r_gas*std_temp)/(std_pres*df_mass[x]))) for y in conc_data]

        # Add the data to the data frame.
        auch_df[df_names[x]] = pd.Series(conc_data,index=datetime_data)

    # Filter to leave only timesteps where we have data for all required species.
    auch_df = auch_df[np.isfinite(auch_df['PM2p5_NH4']) & np.isfinite(auch_df['PM2p5_SO4']) & np.isfinite(auch_df['PM2p5_NO3']) & np.isfinite(auch_df['HNO3']) & np.isfinite(auch_df['NH3'])]

    # Calculate the gas ratio.
    auch_df['Gas_Ratio'] = (auch_df['NH3'] + auch_df['PM2p5_NH4'] - (2 * auch_df['PM2p5_SO4'])) / (auch_df['HNO3'] + auch_df['PM2p5_NO3'])

    # Return the data frame.
    return auch_df

## Read the Chilbolton Observatory data.

In [None]:
# Define the function to load and merge the Chilbolton Observatory data.
def load_chilbolton_observatory(year) :

    # Define the file name.
    file = '%s_Chilbolton_Observatory.xlsx'%(year)

    # Create a data frame to hold the combined data.
    chil_df = pd.DataFrame()

    # Read the different pollutant data sheets.
    df_so4  = pd.read_excel(datadir+file,sheet_name='PM2p5_SO4')
    df_no3  = pd.read_excel(datadir+file,sheet_name='PM2p5_NO3')
    df_nh4  = pd.read_excel(datadir+file,sheet_name='PM2p5_NH4')
    df_hno3 = pd.read_excel(datadir+file,sheet_name='HNO3')
    df_nh3  = pd.read_excel(datadir+file,sheet_name='NH3')

    # Create a list of the data frames to process.
    df_list  = [df_so4,df_no3,df_nh4,df_hno3,df_nh3]
    df_names = ['PM2p5_SO4','PM2p5_NO3','PM2p5_NH4','HNO3','NH3']
    df_mass  = [96.06,62.0049,18.038,63.01,17.031]

    # Process the data.
    for x in range(len(df_list)) :

        # Convert the date and time information to datetime.
        # All data uses the year 2020 to enable them to be plotted on the same graph.
        datetime_data = ['2020-%s-%s-%s'%(str(df_list[x]['Date'][y])[5:7],\
                                          str(df_list[x]['Date'][y])[8:10],\
                                          str("{:02d}".format(int(str(df_list[x]['Time'][y])[0:2])-1)))\
                         for y in range(len(df_list[x]))]
        datetime_data = pd.to_datetime(datetime_data,format='%Y-%m-%d-%H')

        # Convert the concentration data to floats and replace any '<x' values with nan.
        conc_data = df_list[x]['ug m-3'][:]
        for y in range(len(conc_data)) :
            if str(conc_data[y]).startswith('<') :
                conc_data[y] = np.float('NaN')
        conc_data = [(float(y) * 1e3 * ((r_gas*std_temp)/(std_pres*df_mass[x]))) for y in conc_data]

        # Add the data to the data frame.
        chil_df[df_names[x]] = pd.Series(conc_data,index=datetime_data)

    # Filter to leave only timesteps where we have data for all required species.
    chil_df = chil_df[np.isfinite(chil_df['PM2p5_NH4']) & np.isfinite(chil_df['PM2p5_SO4']) & np.isfinite(chil_df['PM2p5_NO3']) & np.isfinite(chil_df['HNO3']) & np.isfinite(chil_df['NH3'])]

    # Calculate the gas ratio.
    chil_df['Gas_Ratio'] = (chil_df['NH3'] + chil_df['PM2p5_NH4'] - (2 * chil_df['PM2p5_SO4'])) / (chil_df['HNO3'] + chil_df['PM2p5_NO3'])
    
    # Return the data frame.
    return chil_df

## Load the observational data.

In [None]:
# Load the Auchencorth Moss data.
auch_df_2016 = load_auchencorth_moss('2016')
auch_df_2017 = load_auchencorth_moss('2017')
auch_df_2018 = load_auchencorth_moss('2018')
auch_df_2019 = load_auchencorth_moss('2019')
auch_df_2020 = load_auchencorth_moss('2020')

# Load the Chilbolton Observatory data.
chil_df_2016 = load_chilbolton_observatory('2016')
chil_df_2017 = load_chilbolton_observatory('2017')
chil_df_2018 = load_chilbolton_observatory('2018')
chil_df_2019 = load_chilbolton_observatory('2019')
chil_df_2020 = load_chilbolton_observatory('2020')

## Create a multiyear averaged dataset.

In [None]:
# Create a multiyear average for the Auchencorth Moss site.
auch_df_avg = pd.concat([auch_df_2016,auch_df_2017,auch_df_2018,auch_df_2019])
auch_df_avg = auch_df_avg.resample('H').mean()

# Create a multiyear average for the Chilbolton Observatory site.
chil_df_avg = pd.concat([chil_df_2016,chil_df_2017,chil_df_2018,chil_df_2019])
chil_df_avg = chil_df_avg.resample('H').mean()

## Perform any time averaging of the observational data.

Average the data by a chosen frequency : 'H' = hour, 'D' = day, 'W' = week.

In [None]:
# Define the resampling period.
resample_period = 'M'

# Resample the Auchencorth Moss data.
auch_df_avg_mean  = auch_df_avg.resample(resample_period,label='left').mean()
auch_df_2016_mean = auch_df_2016.resample(resample_period,label='left').mean()
auch_df_2017_mean = auch_df_2017.resample(resample_period,label='left').mean()
auch_df_2018_mean = auch_df_2018.resample(resample_period,label='left').mean()
auch_df_2019_mean = auch_df_2019.resample(resample_period,label='left').mean()
auch_df_2020_mean = auch_df_2020.resample(resample_period,label='left').mean()
auch_df_avg_min   = auch_df_avg.resample(resample_period,label='left').quantile(0.25)
auch_df_2016_min  = auch_df_2016.resample(resample_period,label='left').quantile(0.25)
auch_df_2017_min  = auch_df_2017.resample(resample_period,label='left').quantile(0.25)
auch_df_2018_min  = auch_df_2018.resample(resample_period,label='left').quantile(0.25)
auch_df_2019_min  = auch_df_2019.resample(resample_period,label='left').quantile(0.25)
auch_df_2020_min  = auch_df_2020.resample(resample_period,label='left').quantile(0.25)
auch_df_avg_max   = auch_df_avg.resample(resample_period,label='left').quantile(0.75)
auch_df_2016_max  = auch_df_2016.resample(resample_period,label='left').quantile(0.75)
auch_df_2017_max  = auch_df_2017.resample(resample_period,label='left').quantile(0.75)
auch_df_2018_max  = auch_df_2018.resample(resample_period,label='left').quantile(0.75)
auch_df_2019_max  = auch_df_2019.resample(resample_period,label='left').quantile(0.75)
auch_df_2020_max  = auch_df_2020.resample(resample_period,label='left').quantile(0.75)

# Resample the Chilbolton Observatory data.
chil_df_avg_mean  = chil_df_avg.resample(resample_period,label='left').mean()
chil_df_2016_mean = chil_df_2016.resample(resample_period,label='left').mean()
chil_df_2017_mean = chil_df_2017.resample(resample_period,label='left').mean()
chil_df_2018_mean = chil_df_2018.resample(resample_period,label='left').mean()
chil_df_2019_mean = chil_df_2019.resample(resample_period,label='left').mean()
chil_df_2020_mean = chil_df_2020.resample(resample_period,label='left').mean()
chil_df_avg_min   = chil_df_avg.resample(resample_period,label='left').quantile(0.25)
chil_df_2016_min  = chil_df_2016.resample(resample_period,label='left').quantile(0.25)
chil_df_2017_min  = chil_df_2017.resample(resample_period,label='left').quantile(0.25)
chil_df_2018_min  = chil_df_2018.resample(resample_period,label='left').quantile(0.25)
chil_df_2019_min  = chil_df_2019.resample(resample_period,label='left').quantile(0.25)
chil_df_2020_min  = chil_df_2020.resample(resample_period,label='left').quantile(0.25)
chil_df_avg_max   = chil_df_avg.resample(resample_period,label='left').quantile(0.75)
chil_df_2016_max  = chil_df_2016.resample(resample_period,label='left').quantile(0.75)
chil_df_2017_max  = chil_df_2017.resample(resample_period,label='left').quantile(0.75)
chil_df_2018_max  = chil_df_2018.resample(resample_period,label='left').quantile(0.75)
chil_df_2019_max  = chil_df_2019.resample(resample_period,label='left').quantile(0.75)
chil_df_2020_max  = chil_df_2020.resample(resample_period,label='left').quantile(0.75)

## Plot the Auchencorth Moss data.

In [None]:
# Define the variables to plot.
variable_list = ['Gas_Ratio','PM2p5_SO4','PM2p5_NO3','PM2p5_NH4','HNO3','NH3']

# Loop over each variable and plot the graph.
for variable in variable_list :

    # Define the figure.
    fig,ax = plt.subplots()

    # Plot the data.
    plt.plot(auch_df_2016_mean.index,auch_df_2016_mean[variable],lw=2,c='indianred',label='2016')
    plt.fill_between(auch_df_2016_mean.index,auch_df_2016_min[variable],auch_df_2016_max[variable],color='indianred',edgecolor=None,alpha=0.25)
    plt.plot(auch_df_2017_mean.index,auch_df_2017_mean[variable],lw=2,c='orange',label='2017')
    plt.fill_between(auch_df_2017_mean.index,auch_df_2017_min[variable],auch_df_2017_max[variable],color='orange',edgecolor=None,alpha=0.25)
    plt.plot(auch_df_2018_mean.index,auch_df_2018_mean[variable],lw=2,c='gold',label='2018')
    plt.fill_between(auch_df_2018_mean.index,auch_df_2018_min[variable],auch_df_2018_max[variable],color='gold',edgecolor=None,alpha=0.25)
    plt.plot(auch_df_2019_mean.index,auch_df_2019_mean[variable],lw=2,c='yellowgreen',label='2019')
    plt.fill_between(auch_df_2019_mean.index,auch_df_2019_min[variable],auch_df_2019_max[variable],color='yellowgreen',edgecolor=None,alpha=0.25)
    plt.plot(auch_df_2020_mean.index,auch_df_2020_mean[variable],lw=2,c='cadetblue',label='2020')
    plt.fill_between(auch_df_2020_mean.index,auch_df_2020_min[variable],auch_df_2020_max[variable],color='cadetblue',edgecolor=None,alpha=0.25)

    # Plot a lockdown marker.
    plt.axvline(pd.to_datetime(dt.datetime(2020,3,23,0,0,0)),lw=2,ls='dashed',c='black')

    # Define the axes ticks.
    date_ticks = [dt.datetime(2020,x+1,1,0,0,0) for x in np.arange(12)]
    date_ticks = pd.to_datetime(date_ticks)
    date_names = ['J','F','M','A','M','J','J','A','S','O','N','D']
    ax.set_xticks(ticks=date_ticks)
    ax.set_xticklabels(date_names)

    # Define the labels.
    plt.title('Auchencorth Moss')
    plt.ylabel(variable)
    plt.legend()

    # Save the figure.
    plot_directory = './'
    plt.savefig(plot_directory+'Auchencorth_Moss_%s_All_Year.png'%(variable),dpi=150)

In [None]:
# Define the variables to plot.
variable_list = ['Gas_Ratio','PM2p5_SO4','PM2p5_NO3','PM2p5_NH4','HNO3','NH3']

# Loop over each variable and plot the graph.
for variable in variable_list :

    # Define the figure.
    fig,ax = plt.subplots()

    # Plot the data.
    plt.plot(auch_df_avg_mean.index,auch_df_avg_mean[variable],lw=2,c='indianred',label='2015-2019')
    plt.fill_between(auch_df_avg_mean.index,auch_df_avg_min[variable],auch_df_avg_max[variable],color='indianred',edgecolor=None,alpha=0.25)
    plt.plot(auch_df_2020_mean.index,auch_df_2020_mean[variable],lw=2,c='cadetblue',label='2020')
    plt.fill_between(auch_df_2020_mean.index,auch_df_2020_min[variable],auch_df_2020_max[variable],color='cadetblue',edgecolor=None,alpha=0.25)

    # Plot a lockdown marker.
    plt.axvline(pd.to_datetime(dt.datetime(2020,3,23,0,0,0)),lw=2,ls='dashed',c='black')

    # Define the axes ticks.
    date_ticks = [dt.datetime(2020,x+1,1,0,0,0) for x in np.arange(12)]
    date_ticks = pd.to_datetime(date_ticks)
    date_names = ['J','F','M','A','M','J','J','A','S','O','N','D']
    ax.set_xticks(ticks=date_ticks)
    ax.set_xticklabels(date_names)

    # Define the labels.
    plt.title('Auchencorth Moss')
    plt.ylabel(variable)
    plt.legend()

    # Save the figure.
    plot_directory = './'
    plt.savefig(plot_directory+'Auchencorth_Moss_%s_Average.png'%(variable),dpi=150)

## Plot the Chilbolton Observatory data.

In [None]:
# Define the variables to plot.
variable_list = ['Gas_Ratio','PM2p5_SO4','PM2p5_NO3','PM2p5_NH4','HNO3','NH3']

# Loop over each variable and plot the graph.
for variable in variable_list :

    # Define the figure.
    fig,ax = plt.subplots()

    # Plot the data.
    plt.plot(chil_df_2016_mean.index,chil_df_2016_mean[variable],lw=2,c='indianred',label='2016')
    plt.fill_between(chil_df_2016_mean.index,chil_df_2016_min[variable],chil_df_2016_max[variable],color='indianred',edgecolor=None,alpha=0.25)
    plt.plot(chil_df_2017_mean.index,chil_df_2017_mean[variable],lw=2,c='orange',label='2017')
    plt.fill_between(chil_df_2017_mean.index,chil_df_2017_min[variable],chil_df_2017_max[variable],color='orange',edgecolor=None,alpha=0.25)
    plt.plot(chil_df_2018_mean.index,chil_df_2018_mean[variable],lw=2,c='gold',label='2018')
    plt.fill_between(chil_df_2018_mean.index,chil_df_2018_min[variable],chil_df_2018_max[variable],color='gold',edgecolor=None,alpha=0.25)
    plt.plot(chil_df_2019_mean.index,chil_df_2019_mean[variable],lw=2,c='yellowgreen',label='2019')
    plt.fill_between(chil_df_2019_mean.index,chil_df_2019_min[variable],chil_df_2019_max[variable],color='yellowgreen',edgecolor=None,alpha=0.25)
    plt.plot(chil_df_2020_mean.index,chil_df_2020_mean[variable],lw=2,c='cadetblue',label='2020')
    plt.fill_between(chil_df_2020_mean.index,chil_df_2020_min[variable],chil_df_2020_max[variable],color='cadetblue',edgecolor=None,alpha=0.25)

    # Plot a lockdown marker.
    plt.axvline(pd.to_datetime(dt.datetime(2020,3,23,0,0,0)),lw=2,ls='dashed',c='black')

    # Define the axes ticks.
    date_ticks = [dt.datetime(2020,x+1,1,0,0,0) for x in np.arange(12)]
    date_ticks = pd.to_datetime(date_ticks)
    date_names = ['J','F','M','A','M','J','J','A','S','O','N','D']
    ax.set_xticks(ticks=date_ticks)
    ax.set_xticklabels(date_names)

    # Define the labels.
    plt.title('Chilbolton Observatory')
    plt.ylabel(variable)
    plt.legend()

    # Save the figure.
    plot_directory = './'
    plt.savefig(plot_directory+'Chilbolton_Observatory_%s_All_Years.png'%(variable),dpi=150)

In [None]:
# Define the variables to plot.
variable_list = ['Gas_Ratio','PM2p5_SO4','PM2p5_NO3','PM2p5_NH4','HNO3','NH3']

# Loop over each variable and plot the graph.
for variable in variable_list :

    # Define the figure.
    fig,ax = plt.subplots()

    # Plot the data.
    plt.plot(chil_df_avg_mean.index,chil_df_avg_mean[variable],lw=2,c='indianred',label='2015-2019')
    plt.fill_between(chil_df_avg_mean.index,chil_df_avg_min[variable],chil_df_avg_max[variable],color='indianred',edgecolor=None,alpha=0.25)
    plt.plot(chil_df_2020_mean.index,chil_df_2020_mean[variable],lw=2,c='cadetblue',label='2020')
    plt.fill_between(chil_df_2020_mean.index,chil_df_2020_min[variable],chil_df_2020_max[variable],color='cadetblue',edgecolor=None,alpha=0.25)

    # Plot a lockdown marker.
    plt.axvline(pd.to_datetime(dt.datetime(2020,3,23,0,0,0)),lw=2,ls='dashed',c='black')

    # Define the axes ticks.
    date_ticks = [dt.datetime(2020,x+1,1,0,0,0) for x in np.arange(12)]
    date_ticks = pd.to_datetime(date_ticks)
    date_names = ['J','F','M','A','M','J','J','A','S','O','N','D']
    ax.set_xticks(ticks=date_ticks)
    ax.set_xticklabels(date_names)

    # Define the labels.
    plt.title('Chilbolton Observatory')
    plt.ylabel(variable)
    plt.legend()

    # Save the figure.
    plot_directory = './'
    plt.savefig(plot_directory+'Chilbolton_Observatory_%s_Average.png'%(variable),dpi=150)