# Create Dataframe for wind verification

Import librairies

In [1]:
import numpy as np                      # Data
import pandas as pd                     # Data 
import geopandas as gpd                 # Data
import xarray as xr                     # Data
import atlite                           # Model
import matplotlib.pyplot as plt         # Plot
from matplotlib.lines import Line2D     # Plot
from tqdm import tqdm                   # Visualise progression in loop
import yaml                             # Open yaml files

## EirGrid

### Locations of farms

Read csv file located in /Data_Final folder

In [2]:
df_capacity = pd.read_csv('../Data_Final/EirGrid/capacity_pv_1424_eir.csv',
                               index_col = 2,
                               parse_dates=True)

FileNotFoundError: [Errno 2] No such file or directory: '../Data_Final/EirGrid/capacity_pv_1424_eir.csv'

Rename the columns to be correctly read by Atlite

In [None]:
df_capacity = df_capacity.dropna().rename(columns={'Capacity (MW)':'capacity', 'latitude':'y', 'longitude':'x'})
df_capacity = df_capacity[df_capacity.index < '2024'] # Make sure to remove all data before 2024

We change the connection date for the large capacities installed in 2023 whose installation date we estimated from the EirGrid data and simulations using this capacity. Namely:

- First two IC installations: 2023-04-01 -> "Rosspile" (95 MW) and "Gillinstown" (95 MW)
- Third IC installation: 2023-04-26 -> "Gallanstown" (119 MW)
- Fourth IC installation: 2023-06-12 -> "Blundelstown" (60 MW)

In [None]:
new_connection_dates = {
    'Rosspile': pd.to_datetime('2023-04-01'),
    'Gillinstown': pd.to_datetime('2023-04-01'),
    'Gallanstown': pd.to_datetime('2023-04-26'),
    'Blundelstown': pd.to_datetime('2023-06-12')
}

In [None]:
for place, new_date in new_connection_dates.items():
    index_tmp = df_capacity[df_capacity['Name'] == place].index
    if not index_tmp.empty:
        df_capacity = df_capacity.rename(index={index_tmp[0]: new_date})

df_capacity.index = pd.to_datetime(df_capacity.index)

## Atlite

### Create Cutout for Ireland

Load weather files downloaded on Copernicus website

In [None]:
# Load ERA5 files located in local folders
ds_temperature = xr.open_dataset('../Data/ERA5/ERA5_2m_temperature_soil_temperature_level_4_hourly_2018_2023.nc')
ds_radiation_sfc = xr.open_dataset('../Data/ERA5/ERA5_surface_net_solar_radiation_surface_solar_radiation_downwards_hourly_2018_2023.nc')
ds_radiation_toa = xr.open_dataset('../Data/ERA5/ERA5_toa_incident_solar_radiation_total_sky_direct_solar_radiation_at_surface_hourly_2018_2023.nc')

Some variables need to be formated

In [None]:
ds_temperature = ds_temperature.reduce(np.nanmean, dim='expver',keep_attrs=True)
ds_radiation_sfc = ds_radiation_sfc.reduce(np.nanmean, dim='expver',keep_attrs=True)
ds_radiation_toa = ds_radiation_toa.reduce(np.nanmean, dim='expver',keep_attrs=True)

In [None]:
# Merge them
ds = xr.merge([ds_temperature, ds_radiation_sfc, ds_radiation_toa])
ds = ds.sel(time=slice("2018-01-01", "2023-12-31"))

Loads the climate variables in Atlite

This can only be done via the function get_cutout_from_era5_data which is not originally in the Atlite scripts but has been added by the authors

In [None]:
cutout_ie = atlite.cutout.get_cutout_from_era5_data('path', ds, ['influx', 'temperature'])

Define function to create a time dependent layout of the capacity from the dataframe

In [None]:
def get_time_dependent_capacity_distribution(cutout_ie: atlite.Cutout, df_capacity: pd.DataFrame):
    capacity_layout = cutout_ie.data['temperature'].copy()
    capacity_layout.name = 'Capacity'
    capacity_layout[:,:,:] = 0.

    # Iterate over all capacity installations
    for idx, row in tqdm(df_capacity.reset_index().iterrows(), total= df_capacity.shape[0]):
        cap = row['capacity']
        df_capacity_i = pd.DataFrame([row])
        layout = cutout_ie.layout_from_capacity_list(df_capacity_i, col="capacity")

        capacity_layout[capacity_layout['time']>=row['Connection date']] += layout

    return capacity_layout

### Get atlite generation

In [None]:
def get_cf_series_atlite(cutout_ie: atlite.Cutout, capacity_layout, panel, orientation):
    pv_cf = atlite.convert.convert_pv(cutout_ie.data, panel=atlite.convert.get_solarpanelconfig(panel), orientation=atlite.convert.get_orientation(orientation), tracking=None)
    if isinstance(capacity_layout, xr.DataArray):
        return pv_cf.weighted(capacity_layout).mean(('x','y'))
    elif isinstance(capacity_layout, pd.DataFrame):
        return pv_cf.weighted(get_time_dependent_capacity_distribution(cutout_ie, capacity_layout).mean(('x','y')))

### Capacity Factor

Now we calculate the wind capacity factor series for both the smoothed and non-smoothed curves. We will compare the two to EirGrid before comparing the different methods to visualise the effective difference this makes.

In [None]:
full_time_layout = get_time_dependent_capacity_distribution(cutout_ie = cutout_ie, df_capacity = df_capacity)

100%|██████████| 31/31 [00:01<00:00, 16.66it/s]


In [None]:
cf_atlite = get_cf_series_atlite(cutout_ie=cutout_ie, capacity_layout=full_time_layout, panel='KANENA', orientation='latitude_optimal')

## C3S-E Gridded

Read CF data from C3S-Energy from 2017 to 2023

In [None]:
da_gridded = xr.open_dataarray('../Data/C3S-E/c3se_solar_capacityfactor_20180101_20231231_gridded_ireland.nc')
da_gridded = da_gridded.sel(time=slice("2018-01-01", "2023-12-31"))

In [None]:
def get_cf_series_c3se_gridded(da_gridded: xr.DataArray, df_capacity_solar: pd.DataFrame, only_cf: bool = True):
    lats = da_gridded['latitude'].values
    lons = da_gridded['longitude'].values

    summed_time_series = np.zeros(da_gridded['time'].shape)
    total_capacity_time_series  = np.zeros(da_gridded['time'].shape)

    # Iterate over all capacity installations
    for idx, row in df_capacity_solar.reset_index().iterrows():
        x = row['x']
        y = row['y']
        cap = row['capacity']

    # Find the nearest lat/lon point
        dif_min_lon = np.argmin(abs(lons-x))
        dif_min_lat = np.argmin(abs(lats-y))

        time_series = da_gridded[:, dif_min_lat, dif_min_lon]

        capacity_time_series = time_series.copy()
        capacity_time_series[:] = cap

        time_series[time_series['time']<row['Connection date']] = 0.
        capacity_time_series[capacity_time_series['time']<row['Connection date']] = 0.

    # Add the mean CF time series to the total multiplied by the capacity (weight)
        summed_time_series += cap*time_series
        total_capacity_time_series += capacity_time_series

    # Divide the total time series by the total IC to go back to CF
    cf_time_series = summed_time_series/total_capacity_time_series
    if only_cf:
        return cf_time_series
    return cf_time_series, summed_time_series, total_capacity_time_series

In [None]:
cf_c3se_gridded = get_cf_series_c3se_gridded(da_gridded=da_gridded, df_capacity_solar=df_capacity, only_cf=True)

## C3S-E National

Read CSV file containing Capacity Factor from C3S-E National

In [None]:
df_nat = pd.read_csv('../Data/C3S-E/c3se_solar_capacityfactor_national.csv',
                         skiprows = 52,
                         usecols = [0,18], # Retrieve IE data
                         index_col = 0,
                         parse_dates = True
)

Read CSV file containing Capacity Factor from C3S-E Subnational

In [None]:
df_sub = pd.read_csv('../Data/C3S-E/c3se_solar_capacityfactor_subnational.csv',
                     skiprows = 52,
                     usecols = [0,350], # Data for NI
                     index_col = 0,
                     parse_dates = True)

Select Time of verification (2017 - 2023)

In [None]:
df_nat = df_nat[("2018" <= df_nat.index) & (df_nat.index < "2024")]
df_sub = df_sub[("2018" <= df_sub.index) & (df_sub.index < "2024")]

In [None]:
capacity_series_ie = get_time_dependent_capacity_distribution(cutout_ie=cutout_ie, df_capacity=df_capacity[df_capacity['ROI/NI']=='ROI'])
capacity_series_ni = get_time_dependent_capacity_distribution(cutout_ie=cutout_ie, df_capacity=df_capacity[df_capacity['ROI/NI']=='NI'])

100%|██████████| 20/20 [00:00<00:00, 37.12it/s]
100%|██████████| 11/11 [00:01<00:00,  8.42it/s]


Aggregate the results into one time series for each distribution

In [None]:
capacity_series_ie = capacity_series_ie.sum(dim=['x','y'])
capacity_series_ni = capacity_series_ni.sum(dim=['x','y'])

### Missing values

The C3S-E National data is missing data for the 31st of December of 2019. We add it here and make it NaN in order to facilitate further treatment.

In [None]:
df_nat.loc[pd.to_datetime('2019-12-31T22:00'), 'IE'] = np.nan
df_nat.loc[pd.to_datetime('2019-12-31T23:00'), 'IE'] = np.nan
df_sub.loc[pd.to_datetime('2019-12-31T22:00'), 'UKN0'] = np.nan
df_sub.loc[pd.to_datetime('2019-12-31T23:00'), 'UKN0'] = np.nan

Resort the data to avoid future issues

In [None]:
df_nat = df_nat.sort_index()
df_sub = df_sub.sort_index()

Finally we average the two times series with capacities as weigths

In [None]:
cf_c3se_national = (df_nat['IE']*capacity_series_ie + df_sub['UKN0']*capacity_series_ni) / (capacity_series_ie + capacity_series_ni)

## EirGrid

### Availability

In [None]:
eirgrid_qtr = pd.read_csv('../Data_Final/EirGrid/generation_pv_1823_eir.csv',
                          index_col = 0,
                          parse_dates = True)
eirgrid_qtr = eirgrid_qtr[("2017" <= eirgrid_qtr.index) & (eirgrid_qtr.index < "2024")]

In [None]:
cf_eirgrid = eirgrid_qtr['Availability'].resample('1h').mean() / full_time_layout.sum(dim=['x','y'])

# Save data as csv file to be used again

In [None]:
df = pd.DataFrame({'time':cf_eirgrid.index, 'EirGrid':cf_eirgrid.values, 'Atlite': cf_atlite.values, 'C3S National': cf_c3se_national.values, 'C3S Gridded': cf_c3se_gridded.values})
df = df.set_index('time')
df.to_csv('../Data_Final/Verification/verification_cf_pv_1823.csv')