In [107]:
import geopandas as gpd
import pandas as pd
from shapely import wkt
from shapely.geometry import Point
from sklearn.neighbors import BallTree
import numpy as np

In [None]:
df = pd.read_excel('WaveEnergy_sites.xlsx', sheet_name='Final_Data')
"Data based on https://www.ocean-energy-systems.org/ocean-energy/gis-map-tool/"

In [109]:
# Load EU NUTS2 data (excluding GB)
nuts2_eu_path = 'NUTS_RG_60M_2024_3035.geojson'
nuts2_eu_gdf = gpd.read_file(nuts2_eu_path)
nuts2_eu_gdf = nuts2_eu_gdf[nuts2_eu_gdf['LEVL_CODE'] == 2]  # Keep only NUTS2 regions

# Load GB-specific NUTS2 data
nuts2_gb_path = 'NUTS_RG_60M_2021_3035.geojson'
nuts2_gb_gdf = gpd.read_file(nuts2_gb_path)
nuts2_gb_gdf = nuts2_gb_gdf[nuts2_gb_gdf['LEVL_CODE'] == 2]  # Keep only NUTS2 regions 
nuts2_gb_gdf = nuts2_gb_gdf[nuts2_gb_gdf['CNTR_CODE'] == 'UK']

# Merge EU and GB NUTS2 regions
nuts2_gdf = pd.concat([nuts2_eu_gdf, nuts2_gb_gdf], ignore_index=True)
nuts2_gdf = nuts2_gdf.drop_duplicates(subset=['NUTS_ID'])
nuts2_gdf

nuts2_gdf = nuts2_gdf.to_crs("EPSG:4326")  # Required for spatial join

In [110]:
df['geometry'] = df['geometry'].apply(wkt.loads)
gdf_points = gpd.GeoDataFrame(df, geometry='geometry', crs='EPSG:4326')

In [111]:
def find_nearest_nuts2(point, country_code, nuts2_gdf):
    # Filter NUTS2 regions for the same country
    candidates = nuts2_gdf[nuts2_gdf['CNTR_CODE'] == country_code].copy()
    if candidates.empty:
        return None

    # Compute centroids for distance calculation
# Reproject temporarily to a projected CRS (e.g. EPSG:3035 for Europe)
    candidates_projected = candidates.to_crs(epsg=3035)
    candidates['centroid'] = candidates_projected.geometry.centroid.to_crs(epsg=4326)

    coords_candidates = np.array(list(candidates['centroid'].apply(lambda x: (x.y, x.x))))  # lat, lon

    # Use BallTree with haversine distance
    tree = BallTree(np.radians(coords_candidates), metric='haversine')
    point_coords = np.radians([[point.y, point.x]])
    dist, idx = tree.query(point_coords, k=1)
    nearest_index = idx[0][0]

    return candidates.iloc[nearest_index]['NUTS_ID']

In [112]:

country_map = {
    "United Kingdom": "UK",
    "Germany": "DE",
    "France": "FR",
    "Spain": "ES",
    "Italy": "IT",
    "Netherlands": "NL",
    "Belgium": "BE",
}
gdf_points['country'] = gdf_points['country'].map(country_map)


In [113]:
# Assign nearest NUTS2 ID to each point
gdf_points['NUTS2_ID'] = gdf_points.apply(
    lambda row: find_nearest_nuts2(row.geometry, row['country'], nuts2_gdf),
    axis=1
)

In [114]:
# Group by NUTS2 region and sum the capacities
agg_capacity = gdf_points.groupby('NUTS2_ID', as_index=False)['capacity_mw'].sum()
agg_capacity.rename(columns={'capacity_mw': 'total_capacity_mw'}, inplace=True)

# Only keep NUTS2 regions with capacity > 0
agg_capacity = agg_capacity[agg_capacity['total_capacity_mw'] > 0]

# Final result: only NUTS2_ID and total_capacity_mw
print(agg_capacity)

  NUTS2_ID  total_capacity_mw
0     ES21              0.300
1     FRH0            240.000
2     ITH5              0.050
3     ITI1              0.050
4     NL12              0.050
5     UKM5              6.300
6     UKM6              0.022


In [115]:
agg_capacity.to_csv("aggregated_oceanenergy_capacity_by_nuts2.csv", index=False, sep=';', decimal=',')
