In [33]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Read in historical observations for max temp and min temp for Dublin Airport, Cork Airport, Belmullet, Valentia Observatory, Malin Head and Mullingar.

In [34]:
file_paths = ['../Data/MetEireann/dly532.csv', '../Data/MetEireann/dly3904.csv', '../Data/MetEireann/dly2375.csv',
              '../Data/MetEireann/dly2275.csv', '../Data/MetEireann/dly1575.csv', '../Data/MetEireann/dly875.csv']

col_names = ["Date", "Mxt2m", "Mnt2m"]

data = [pd.read_csv(fp, skiprows=26, usecols=[0,2,4], names=col_names, parse_dates=True, index_col=0, na_values=' ')
       for fp in file_paths]

Slice the data and extract the period 1981-2010, as this will be the 30 year period determining climatological predictions.

In [35]:
CLM_data = {'DUB': data[0]['1981':'2010'], 'CORK': data[1]['1981':'2010'], 'BEL': data[2]['1981':'2010'],
            'VAL': data[3]['1981':'2010'], 'MAL': data[4]['1981':'2010'], 'MUL': data[5]['1981':'2010']}

Create a 6x12 list ``CLM_t2m``, which is composed of 12 values for 6 locations, the average temperature for each month over the period 1981-2010.

This is obtained by obtaining the average maximum temperature and average minimum temperature for each month and averaging those two values.

The ``.groupby()`` function in pandas allows the mean monthly figures to be calculated. Looping through each ``key in CLM`` gives a 6x12 array, where the 6 rows represents DUB, CORK, BEL, VAL, MAL & MUL *in that order*

In [44]:
CLM_t2m = []

for key in CLM_data:
    Mxt2m_monthly = CLM_data[key].Mxt2m.groupby(CLM_data[key].index.month).mean()
    Mnt2m_monthly = CLM_data[key].Mnt2m.groupby(CLM_data[key].index.month).mean()
    CLM_t2m.append((Mxt2m_monthly.values+Mnt2m_monthly.values)/2)
    
CLM_t2m = np.tile(CLM_t2m, (1,6))

%store CLM_t2m

Stored 'CLM_t2m' (ndarray)


Repeating the same process, except this time instead of finding the mean, we find the boundary of the lower tercile and upper tercile for each location. This will be required for computing the Relative Operating Charcteristic (ROC) and Brier Score later.

Finally, storing these arrays using ``%store`` for computing skill scores in different notebooks.

In [53]:
CLM_t2m_lowertercile = []

for key in CLM_data:
    Mxt2m_monthly = CLM_data[key].Mxt2m.groupby(CLM_data[key].index.month).quantile(0.33)
    Mnt2m_monthly = CLM_data[key].Mnt2m.groupby(CLM_data[key].index.month).quantile(0.33)
    CLM_t2m_lowertercile.append((Mxt2m_monthly.values+Mnt2m_monthly.values)/2)
    
CLM_t2m_lowertercile = np.tile(CLM_t2m_lowertercile, (1,6))

%store CLM_t2m_lowertercile

Stored 'CLM_t2m_lowertercile' (ndarray)


In [54]:
CLM_t2m_uppertercile = []

for key in CLM_data:
    Mxt2m_monthly = CLM_data[key].Mxt2m.groupby(CLM_data[key].index.month).quantile(0.67)
    Mnt2m_monthly = CLM_data[key].Mnt2m.groupby(CLM_data[key].index.month).quantile(0.67)
    CLM_t2m_uppertercile.append((Mxt2m_monthly.values+Mnt2m_monthly.values)/2)
    
CLM_t2m_uppertercile = np.tile(CLM_t2m_uppertercile, (1,6))

%store CLM_t2m_uppertercile

Stored 'CLM_t2m_uppertercile' (ndarray)
