In [None]:
import xarray as xr 
import os
import sys
import datetime 
import pandas as pd 
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, spearmanr

# adds the package path to the Python path to make sure all the local imports work fine 
if os.path.dirname(os.path.dirname(os.getcwd())) not in sys.path:
    sys.path.append(os.path.dirname(os.path.dirname(os.getcwd())))
    
from constants import POLLUTANTS, DATA_DIR_CAMS

In [None]:
start_date = datetime.datetime(year=2019, month=1, day=1)

def convert_string_to_datetime(x):
    """
    Function date turns the hourstamps into datetime objects
    """
    return start_date + datetime.timedelta(hours=round(x*24))

In [None]:
mace_head_loc = {  # coordinates of the mace head location
    'latitude':53.326130,
    'longitude':-9.900343
}

ground_path = Path("data/mace_head/PM25/pm25_2019.csv")
cams_path = Path(DATA_DIR_CAMS).joinpath(f"{POLLUTANTS['PM25']['CAMS']}.nc")

# Preprocessing Mace Head data

PM25_ground = pd.read_csv(ground_path)  # read csv as pandas dataframe
PM25_ground['time'] = PM25_ground['start_time'].apply(convert_string_to_datetime)  # convert hour stamps to datetime
PM25_ground = PM25_ground[['time', 'pm25']]  # select datetime and PM10 measurements
PM25_ground.set_index('time', inplace=True)  # set datetime as index, will be used later to join datasets

# Preprocessing the CAMS observations

cams_obs = xr.open_dataset(cams_path)  # open cams observations as axarray dataframe
# select the CAMS observations for ozone closest to the Mace Head location
PM25_cams = cams_obs.pm2p5_conc.sel(
    latitude=mace_head_loc['latitude'],
    longitude=mace_head_loc['longitude'],
    level=0,
    method='nearest')

# convert the xarray object into into a pandas dataframe
PM25_cams = pd.DataFrame(data=PM25_cams.to_pandas(), columns=['PM25 cams'])

# Join the CO observations from CAMS and Mace Head into a single pandas dataframe
PM25 = PM25_ground.rename(columns={'pm25':'PM25 ground'}).join(PM25_cams, how='right')
PM25 = PM25.copy()
PM25['PM25 ground'] = PM25['PM25 ground'].astype('float')  # convert column from string to float
PM25 = PM25.replace(999.99, np.nan) # set the 999.99 values as NoData
# As there is a considerable data gap during the first 6 months of the year, 
# only the last 6 months are used to calculate the correlation coefficient
PM25 = PM25[PM25.index > datetime.datetime(year=2019, month=6, day=30, hour=23, minute=59)]

In [None]:
ax = plt.gca()  # matplotlib axes
F = plt.gcf()  # matplotlib figure
Size = F.get_size_inches() 
F.set_size_inches(Size[0]*2.5, Size[1]*2.5, forward=True) # resize the figure

# Calculate the moving average over one week (168 hours),
# as it makes it easier to interpret a dataset with many observations

PM25['PM25 MA ground'] = PM25['PM25 ground'].dropna().rolling(window=168).mean()
PM25['PM25 MA cams'] = PM25['PM25 cams'].dropna().rolling(window=168).mean()

# Drop the Ground Data NA rows from the DF
PM25 = PM25.dropna(subset=['PM25 ground'])

# Plot the lines
PM25.reset_index().plot(kind='line',x='time',y='PM25 MA ground',ax=ax)
PM25.reset_index().plot(kind='line',x='time',y='PM25 MA cams', color='red', ax=ax)

ax.set_xlabel('Time')  # Add an x-label to the axes.
ax.set_ylabel('PM 25 Âµg m-3')  # Add a y-label to the axes.
ax.set_title("Ground vs Cams PM 2.5 Concentrations 2019 Mace Head 1 Week Moving Average")  # Add a title to the axes.
ax.legend()  # Add a legend.

In [None]:
# plot the histograms of the datasets, to assess the distribution of the values
PM25.hist(bins=50)

In [None]:
# Calculate the correlation coefficients for both the Moving Average as well as the original datasets

PM25 = PM25.dropna()

p_MA = pearsonr(
    PM25['PM25 MA ground'],
    PM25['PM25 MA cams'],
)

p_PM25 = pearsonr(
    PM25['PM25 ground'],
    PM25['PM25 cams'],
)


s_MA = spearmanr(
    PM25['PM25 MA ground'],
    PM25['PM25 MA cams'],
)

s_PM25 = spearmanr(
    PM25['PM25 ground'],
    PM25['PM25 cams'],
)


print(
    f"""
    Pearson R: {p_PM25[0]} - p-value: {p_PM25[1]} \n
    Pearson R Moving Average: {p_MA[0]} - p-value: {p_MA[1]} \n
    Spearman R: {s_PM25[0]} - p-value: {s_PM25[1]} \n
    Spearman R Moving Average: {s_MA[0]} - p-value: {s_MA[1]} \n
    """
)