In [None]:
import xarray as xr 
import os
import sys
import datetime 
import pandas as pd 
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, spearmanr

# adds the package path to the Python path to make sure all the local imports work fine 
if os.path.dirname(os.path.dirname(os.getcwd())) not in sys.path:
    sys.path.append(os.path.dirname(os.path.dirname(os.getcwd())))
    
from constants import POLLUTANTS, DATA_DIR_CAMS

In [None]:
start_date = datetime.datetime(year=2019, month=1, day=1)

def convert_string_to_datetime(x):
    """
    Function date turns the hourstamps into datetime objects
    """
    return start_date + datetime.timedelta(hours=round(x*24))

In [None]:
mace_head_loc = {  # coordinates of the mace head location
    'latitude':53.326130,
    'longitude':-9.900343
}

ground_path = Path("data/mace_head/PM10/pm10_2019.csv")
cams_path = Path(DATA_DIR_CAMS).joinpath(f"{POLLUTANTS['PM10']['CAMS']}.nc")

# Preprocessing Mace Head data

PM10_ground = pd.read_csv(ground_path)  # read csv as pandas dataframe
PM10_ground['time'] = PM10_ground['start_time'].apply(convert_string_to_datetime) # convert hour stamps to datetime
PM10_ground = PM10_ground[['time', 'pm10']]  # select datetime and PM10 measurements
PM10_ground.set_index('time', inplace=True)  # set datetime as index, will be used later to join datasets


# Preprocessing the CAMS observations
cams_obs = xr.open_dataset(cams_path)
# select the CAMS observations for ozone closest to the Mace Head location
PM10_cams = cams_obs.pm10_conc.sel(
    latitude=mace_head_loc['latitude'],
    longitude=mace_head_loc['longitude'],
    level=0,
    method='nearest')

# convert the xarray object into into a pandas dataframe
PM10_cams = pd.DataFrame(data=PM10_cams.to_pandas(), columns=['PM10 cams'])


# Join the CO observations from CAMS and Mace Head into a single pandas dataframe
PM10 = PM10_ground.rename(columns={'pm10':'PM10 ground'}).join(PM10_cams, how='right')
PM10 = PM10.copy()
PM10['PM10 ground'] = PM10['PM10 ground'].astype('float')  # convert column from string to float
PM10 = PM10.replace(999.99, np.nan)  # set the 999.99 values as NoData
# As there is a considerable data gap during the first 6 months of the year, 
# only the last 6 months are used to calculate the correlation coefficient
PM10 = PM10[PM10.index > datetime.datetime(year=2019, month=6, day=30, hour=23, minute=59)]

In [None]:
ax = plt.gca()  # matplotlib axes
F = plt.gcf()  # matplotlib figure
Size = F.get_size_inches() 
F.set_size_inches(Size[0]*2.5, Size[1]*2.5, forward=True) # resize the figure

# Calculate the moving average over one week (168 hours),
# as it makes it easier to interpret a dataset with many observations

PM10['PM10 MA ground'] = PM10['PM10 ground'].dropna().rolling(window=168).mean()
PM10['PM10 MA cams'] = PM10['PM10 cams'].dropna().rolling(window=168).mean()

# Drop the Ground Data NA rows from the DF
PM10 = PM10.dropna(subset=['PM10 ground'])

PM10.reset_index().plot(kind='line',x='time',y='PM10 MA ground',ax=ax)
PM10.reset_index().plot(kind='line',x='time',y='PM10 MA cams', color='red', ax=ax)

ax.set_xlabel('Time')  # Add an x-label to the axes.
ax.set_ylabel('PM 10 µg m-3')  # Add a y-label to the axes.
ax.set_title("Ground vs Cams PM 10 Concentrations 2019 Mace Head 1 Week Moving Average")  # Add a title to the axes.
ax.legend()  # Add a legend.

In [None]:
# plot the histograms of the datasets, to assess the distribution of the values
PM10.dropna().hist(bins=50)

In [None]:
# Calculate the correlation coefficients for both the Moving Average as well as the original datasets

PM10 = PM10.dropna()

p_MA = pearsonr(
    PM10['PM10 MA ground'],
    PM10['PM10 MA cams']
)

p_PM10 = pearsonr(
    PM10['PM10 ground'],
    PM10['PM10 cams']
)


s_MA = spearmanr(
    PM10['PM10 MA ground'],
    PM10['PM10 MA cams']
)

s_PM10 = spearmanr(
    PM10['PM10 ground'],
    PM10['PM10 cams']
)

print(
    f"""
    Pearson R: {p_PM10[0]} - p-value: {p_PM10[1]} \n
    Pearson R Moving Average: {p_MA[0]} - p-value: {p_MA[1]} \n
    Spearman R: {s_PM10[0]} - p-value: {s_PM10[1]} \n
    Spearman R Moving Average: {s_MA[0]} - p-value: {s_MA[1]} \n
    """
)