In [None]:
import xarray as xr 
import os
import sys
import datetime 
import pandas as pd 
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, spearmanr

# adds the package path to the Python path to make sure all the local imports work fine 
if os.path.dirname(os.path.dirname(os.getcwd())) not in sys.path:
    sys.path.append(os.path.dirname(os.path.dirname(os.getcwd())))
    
from constants import POLLUTANTS, DATA_DIR_CAMS

In [None]:
def convert_string_to_datetime(x):
    """
    Function that converts the datestring in the CO csv to a datetime object 
    """
    return datetime.datetime.strptime(x, '%m/%d/%Y %H:%M')

In [None]:
mace_head_loc = {  # coordinates of the mace head location
    'latitude':53.326130,
    'longitude':-9.900343
}

ground_path = Path("data/mace_head/CO/MHD-gcmd_2018.csv")
cams_path = Path(DATA_DIR_CAMS).joinpath(f"{POLLUTANTS['CO']['CAMS']}.nc") 

# First Step
# Preprocessing Mace Head data
CO_ground = pd.read_csv(ground_path)  # read csv with Mace Head ground data 
CO_ground = CO_ground.copy()
CO_ground = CO_ground.rename(columns=lambda x: x.strip())  # remove the whitespace around the column names
CO_ground = CO_ground.rename(columns={'mm/dd/yyyy hh:mm':'time'})  # rename column 
CO_ground = CO_ground[['time', 'CO']]  # select the date&time and Carbon Monoxide column for further processing
CO_ground['time'] = CO_ground['time'].apply(convert_string_to_datetime)  # converts the date and time string into datetime object
# measurements are taken every 40 mins, the following line rounds the time to the nearest full hour
CO_ground['time'] = CO_ground['time'].dt.round('60min')
CO_ground = CO_ground[CO_ground['CO'] != ' ']  # drop rows with no data values
CO_ground['CO'] = CO_ground['CO'].astype('float')  # convert from string to float
CO_ground = CO_ground.groupby('time').mean()  # calculate the average value per hour
CO_ground['CO'] = CO_ground['CO'] * 1.145  # convert ppm to µg m-3

# Second Step 
# Preprocessing the CAMS observations

cams_obs = xr.open_dataset(cams_path)

# select the CAMS observations for carbon monoxide closest to the Mace Head location
CO_cams = cams_obs.co_conc.sel(
    latitude=mace_head_loc['latitude'],
    longitude=mace_head_loc['longitude'],
    level=0,
    method='nearest')

# convert the xarray object into into a pandas dataframe
CO_cams = pd.DataFrame(data=CO_cams.to_pandas(), columns=['CO cams'])


# Third step
# Join the CO observations from CAMS and Mace Head into a single pandas dataframe

CO = CO_ground.rename(columns={'CO':'CO ground'}).merge(CO_cams, left_index=True, right_index=True)

In [None]:
ax = plt.gca()  # matplotlib axes
F = plt.gcf()  # matplotlib figure
Size = F.get_size_inches() 
F.set_size_inches(Size[0]*2.5, Size[1]*2.5, forward=True) # resize the figure

# Calculate the moving average over one week (168 hours),
# as it makes it easier to interpret a dataset with many observations

CO['CO MA ground'] = CO['CO ground'].rolling(window=168).mean()
CO['CO MA cams'] = CO['CO cams'].rolling(window=168).mean()

CO.reset_index().plot(kind='line',x='time',y='CO MA ground',ax=ax)
CO.reset_index().plot(kind='line',x='time',y='CO MA cams', color='red', ax=ax)

ax.set_xlabel('Time')  # Add an x-label to the axes.
ax.set_ylabel('CO µg m-3')  # Add a y-label to the axes.
ax.set_title("Ground vs Cams Carbon Monoxide Concentrations 2018 Mace Head 1 Week Moving Average")  # Add a title to the axes.
ax.legend()  # Add a legend.
plt.show

In [None]:
# plot the histograms of the datasets, to assess the distribution of the values
CO.hist(bins=50) 

In [None]:
# Calculate the correlation coefficients for both the Moving Average as well as the original datasets

CO = CO.dropna()

p_MA = pearsonr(
    CO['CO MA ground'],
    CO['CO MA cams'],
)

p_CO = pearsonr(
    CO['CO ground'],
    CO['CO cams'],
)


s_MA = spearmanr(
    CO['CO MA ground'],
    CO['CO MA cams'],
)

s_CO = spearmanr(
    CO['CO ground'],
    CO['CO cams'],
)

print(
    f"""
    Pearson R: {p_CO[0]} - p-value: {p_CO[1]} \n
    Pearson R Moving Average: {p_MA[0]} - p-value: {p_MA[1]} \n
    Spearman R: {s_CO[0]} - p-value: {s_CO[1]} \n
    Spearman R Moving Average: {s_MA[0]} - p-value: {s_MA[1]} \n
    """
)