In [None]:
import xarray as xr 
import os
import sys
import datetime 
import pandas as pd 
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, spearmanr

# adds the package path to the Python path to make sure all the local imports work fine 
if os.path.dirname(os.path.dirname(os.getcwd())) not in sys.path:
    sys.path.append(os.path.dirname(os.path.dirname(os.getcwd())))
    
from constants import POLLUTANTS, DATA_DIR_CAMS

In [None]:
def date_str_to_date(x):
    """
    Function that converts the datestring in the O3 csv to a datetime object 
    """
    return datetime.datetime.strptime(x.replace('+AC0',''), '%d-%m-%Y')

In [None]:
mace_head_loc = {  # coordinates of the mace head location
    'latitude':53.326130,
    'longitude':-9.900343
}

ground_path = Path("data/mace_head/O3/MH_O3_2018.csv")
cams_path = Path(DATA_DIR_CAMS).joinpath(f"{POLLUTANTS['O3']['CAMS']}.nc")


# First Step
# Preprocessing Mace Head data
O3_ground = pd.read_csv(ground_path)
O3_ground = O3_ground.rename(columns=lambda x: x.strip())  # remove whitespace around column titles
O3_ground['Date'] = O3_ground['Date'].apply(date_str_to_date, 1)  # convert datestrings into datetime bjects

df = pd.DataFrame(columns=['time','O3'])  # create an empty dataframe

# Loop joins all the hourly observations into a single column 
for ind,row in O3_ground.iterrows():
    date = row['Date']
    for hour in O3_ground.columns.to_list()[1:]:
        if hour == '24:00':
            df.loc[df.shape[0]] = [date + datetime.timedelta(days=1), row[hour]]
        else:
            df.loc[df.shape[0]] = [date.replace(hour=int(hour[:2])), row[hour]]
 
# set the time as index, as this will be used for joining the dataframes later
O3_ground = df.copy().set_index('time')  

# Second Step 
# Preprocessing the CAMS observations

cams_obs = xr.open_dataset(cams_path)
# select the CAMS observations for ozone closest to the Mace Head location
O3_cams = cams_obs.o3_conc.sel(
    latitude=mace_head_loc['latitude'],
    longitude=mace_head_loc['longitude'],
    level=0,
    method='nearest')

# convert the xarray object into into a pandas dataframe
O3_cams = pd.DataFrame(data=O3_cams.to_pandas(), columns=['O3 cams'])


# Third step
# Join the O3 observations from CAMS and Mace Head into a single pandas dataframe

O3 = O3_ground.rename(columns={'O3':'O3 ground'}).merge(O3_cams, left_index=True, right_index=True)
O3 = O3[O3['O3 ground'] != '      ']  # remove empty cells, indicating NA
O3['O3 ground'] = O3['O3 ground'].astype('float')  # convert column from string to float

In [None]:
ax = plt.gca()  # matplotlib axes
F = plt.gcf()  # matplotlib figure
Size = F.get_size_inches() 
F.set_size_inches(Size[0]*2.5, Size[1]*2.5, forward=True) # resize the figure

# Calculate the moving average over one week (168 hours),
# as it makes it easier to interpret a dataset with many observations

O3['O3 MA ground'] = O3['O3 ground'].rolling(window=168).mean()
O3['O3 MA cams'] = O3['O3 cams'].rolling(window=168).mean()

O3.reset_index().plot(kind='line',x='time',y='O3 MA ground',ax=ax)
O3.reset_index().plot(kind='line',x='time',y='O3 MA cams', color='red', ax=ax)

ax.set_xlabel('Time')  # Add an x-label to the axes.
ax.set_ylabel('O3 µg m-3')  # Add a y-label to the axes.
ax.set_title("Ground vs Cams Ozone Concentrations 2018 Mace Head 1 Week Moving Average")  # Add a title to the axes.
ax.legend()  # Add a legend.
plt.show

In [None]:
# plot the histograms of the datasets, to assess the distribution of the values
O3.hist(bins=50)

In [None]:
# Calculate the correlation coefficients for both the Moving Average as well as the original datasets
O3 = O3.dropna()

p_MA = pearsonr(
    O3['O3 MA ground'],
    O3['O3 MA cams'],
)

p_O3 = pearsonr(
    O3['O3 ground'],
    O3['O3 cams'],
)


s_MA = spearmanr(
    O3['O3 MA ground'],
    O3['O3 MA cams'],
)

s_O3 = spearmanr(
    O3['O3 ground'],
    O3['O3 cams'],
)
print(
    f"""
    Pearson R: {p_O3[0]} - p-value: {p_O3[1]} \n
    Pearson R Moving Average: {p_MA[0]} - p-value: {p_MA[1]} \n
    Spearman R: {s_O3[0]} - p-value: {s_O3[1]} \n
    Spearman R Moving Average: {s_MA[0]} - p-value: {s_MA[1]} \n
    """
)