This code is used to generate EURO2021 & EURO2025 boundary for zonal statistics. 

- EURO2021 (2243): NUTS 2021 (2010) + ITL 2021 (232) + 1  
- EURO2025 (2039): NUTS 2024 (1798) + ITL 2025 (240) + 1  

Data wrangling steps: 
1. read in NUTS (3035, Drop: 'NAME_LATN', 'MOUNT_TYPE', 'MOUNT_TYPE', 'URBN_TYPE', 'COAST_TYPE') 
and ITL (27700, Drop: 'BNG_E', 'BNG_N') seperatly
2. Change both CRS to Equal earth (EPSG:8857, ref: https://epsg.io/8857)
3. Get the boundary of UK ITL for 2021 and 2025, and make sure the name is not the same with NUTS （renamed to UK0 to ITL）
> note: the EURO has 2 complete (level0 to level 3) data of UK, from NUTS and UK
4. Export as 8857 for geojson, and 4326 for shp for GEE

In [1]:
import re
import os
import gc
import json
import fiona
import shutil
import rasterio
import numpy as np
import pandas as pd
from tqdm import tqdm
import geopandas as gpd
from pathlib import Path
from osgeo import gdal, osr
import dask_geopandas as dgpd
from shapely.geometry import shape
from rasterstats import zonal_stats
import geopandas as gpd
import fiona

BASE_DIR = Path('/Users/wenlanzhang/PycharmProjects/Mapineq/src/data-wrangling/')
DATA_DIR = Path('/Users/wenlanzhang/Downloads/PhD_UCL/Data/Oxford')

# Target CRS: Equal Earth
target_crs = "EPSG:8857"


# Load ITL and NUTS

## Load ITL

In [2]:
# Paths to your shapefiles — replace these with actual file paths
itl_2021_path = DATA_DIR/"ITL/itl_2021_BGC.shp"
itl_2025_path = DATA_DIR/"ITL/itl_2025_BGC.shp"

# Read the shapefiles
itl_2021 = gpd.read_file(itl_2021_path)
itl_2025 = gpd.read_file(itl_2025_path)

itl_2021 = itl_2021.drop(columns=['BNG_E', 'BNG_N', 'LONG', 'LAT', 'GlobalID'])
itl_2025 = itl_2025.drop(columns=['BNG_E', 'BNG_N', 'LONG', 'LAT', 'GlobalID'])

itl_2021['CNTR_CODE'] = 'UK'
itl_2025['CNTR_CODE'] = 'UK'

itl_2021
# itl_2025.sort_values(by="ITL_CODE")

Unnamed: 0,ITL_CODE,ITL_NAME,ITL_LEVEL,geometry,CNTR_CODE
0,TLC,North East (England),1,"MULTIPOLYGON (((402039.597 506235.701, 401929....",UK
1,TLD,North West (England),1,"MULTIPOLYGON (((364516.901 338994.103, 364347....",UK
2,TLE,Yorkshire and The Humber,1,"MULTIPOLYGON (((450524.201 378564.296, 450416....",UK
3,TLF,East Midlands (England),1,"MULTIPOLYGON (((455182.298 231283.095, 455000....",UK
4,TLG,West Midlands (England),1,"POLYGON ((403387.797 368314.9, 403515.999 3682...",UK
...,...,...,...,...,...
227,TLN0E,Lisburn and Castlereagh,3,"POLYGON ((138843.75 530937.408, 138876.343 530...",UK
228,TLN0F,Mid and East Antrim,3,"POLYGON ((112237.347 552083.778, 112276.828 55...",UK
229,TLN0G,Fermanagh and Omagh,3,"POLYGON ((80027.441 554144.198, 80556.966 5533...",UK
230,TLK24,"Bournemouth, Christchurch and Poole",3,"MULTIPOLYGON (((403839.945 87004.556, 403705.2...",UK


In [3]:
# Reproject ITL 
itl_2021 = itl_2021.to_crs(target_crs)
itl_2025 = itl_2025.to_crs(target_crs)

In [4]:
# --- Check CRS ---
print("ITL 2021 CRS:", itl_2021.crs)
print("ITL 2025 CRS:", itl_2025.crs)

print("CRS match:", itl_2021.crs == itl_2025.crs)

ITL 2021 CRS: EPSG:8857
ITL 2025 CRS: EPSG:8857
CRS match: True


In [5]:
# # ------------- Check the boundary ------------- 
# import matplotlib.pyplot as plt
# from shapely.geometry import box

# # Optional: reproject to match if needed
# if itl_2021.crs != itl_2025.crs:
#     itl_2025 = itl_2025.to_crs(itl_2021.crs)

# # Plot both shapefiles
# fig, ax = plt.subplots(figsize=(10, 10))
# itl_2021.plot(ax=ax, edgecolor='blue', facecolor='none', label='ITL 2021')
# itl_2025.plot(ax=ax, edgecolor='red', facecolor='none', linestyle='--', label='ITL 2025')
# # plt.legend()
# plt.title("ITL 2021 (blue) vs ITL 2025 (red dashed)")
# plt.show()

## Load NUTS

In [5]:
# Read the shapefiles
NUTS_2021 = gpd.read_file(DATA_DIR/"NUTS/NUTS_RG_01M_2021_3035.geojson")
NUTS_2024 = gpd.read_file(DATA_DIR/"NUTS/NUTS_RG_01M_2024_3035.geojson")

NUTS_2021 = NUTS_2021.drop(columns=['NAME_LATN', 'MOUNT_TYPE', 'MOUNT_TYPE', 'URBN_TYPE', 'COAST_TYPE'])
NUTS_2024 = NUTS_2024.drop(columns=['NAME_LATN', 'MOUNT_TYPE', 'MOUNT_TYPE', 'URBN_TYPE', 'COAST_TYPE'])

NUTS_2021

Unnamed: 0,NUTS_ID,LEVL_CODE,CNTR_CODE,NUTS_NAME,geometry
0,AL,0,AL,Shqipëria,"MULTIPOLYGON (((5121233.536 2221719.441, 51208..."
1,CZ,0,CZ,Česko,"POLYGON ((4624843.654 3112209.741, 4625546.618..."
2,DE,0,DE,Deutschland,"MULTIPOLYGON (((4355225.365 2715902.993, 43541..."
3,DK,0,DK,Danmark,"MULTIPOLYGON (((4650502.736 3591342.844, 46503..."
4,CY,0,CY,Κύπρος,"MULTIPOLYGON (((6527040.718 1762367.593, 65267..."
...,...,...,...,...,...
2005,NO0B1,3,NO,Jan Mayen,"POLYGON ((3623747.621 5400386.841, 3624031.138..."
2006,EE009,3,EE,Kesk-Eesti,"MULTIPOLYGON (((5216227.688 4159212.769, 52172..."
2007,NO0,1,NO,Norge,"MULTIPOLYGON (((4961367.759 5413266.131, 49622..."
2008,NO0B,2,NO,Jan Mayen and Svalbard,"MULTIPOLYGON (((4744650.828 6379141.635, 47446..."


In [6]:
# Reproject NUTS 
NUTS_2021 = NUTS_2021.to_crs(target_crs)
NUTS_2024 = NUTS_2024.to_crs(target_crs)

# --- Check CRS ---
print("NUTS 2021 CRS:", NUTS_2021.crs)
print("NUTS 2024 CRS:", NUTS_2024.crs)

print("CRS match:", NUTS_2021.crs == NUTS_2024.crs)

NUTS 2021 CRS: EPSG:8857
NUTS 2024 CRS: EPSG:8857
CRS match: True


In [7]:
# Check UK in NUTS

NUTS_2021[(NUTS_2021['CNTR_CODE'] == 'UK') & (NUTS_2021['LEVL_CODE'] == 0)]

Unnamed: 0,NUTS_ID,LEVL_CODE,CNTR_CODE,NUTS_NAME,geometry
60,UK,0,UK,United Kingdom,"MULTIPOLYGON (((-249800.512 6805458.936, -2495..."


# Get boundary for UK (level0)

In [9]:
itl_2021 = itl_2021.to_crs(target_crs)
itl_2025 = itl_2025.to_crs(target_crs)
itl_2021.crs

<Projected CRS: EPSG:8857>
Name: WGS 84 / Equal Earth Greenwich
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Coordinate Operation:
- name: Equal Earth Greenwich
- method: Equal Earth
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [11]:
# import geopandas as gpd
from shapely.ops import unary_union

# Select ITL_LEVEL==1
level_1_2021 = itl_2021[itl_2021['ITL_LEVEL'] == 1]

# Merge geometries
merged_geometry = unary_union(level_1_2021.geometry)

# Create new row as a GeoDataFrame
new_row = gpd.GeoDataFrame([{
    'ITL_CODE': 'ITL',
    'ITL_NAME': 'United Kingdom',
    'ITL_LEVEL': 0,
    'CNTR_CODE': 'UK',
    'geometry': merged_geometry
}], crs=level_1_2021.crs)  # Keep same CRS

# Append to original GeoDataFrame
itl_2021_0 = pd.concat([itl_2021, new_row], ignore_index=True)

itl_2021_0

Unnamed: 0,ITL_CODE,ITL_NAME,ITL_LEVEL,geometry,CNTR_CODE
0,TLC,North East (England),1,"MULTIPOLYGON (((-150023.597 6421565.781, -1501...",UK
1,TLD,North West (England),1,"MULTIPOLYGON (((-195202.511 6277323.483, -1953...",UK
2,TLE,Yorkshire and The Humber,1,"MULTIPOLYGON (((-95651.593 6311598.73, -95775....",UK
3,TLF,East Midlands (England),1,"MULTIPOLYGON (((-93223.092 6182667.126, -93428...",UK
4,TLG,West Midlands (England),1,"POLYGON ((-150190.618 6302937.743, -150043.155...",UK
...,...,...,...,...,...
228,TLN0F,Mid and East Antrim,3,"POLYGON ((-491777.644 6452716.713, -491783.497...",UK
229,TLN0G,Fermanagh and Omagh,3,"POLYGON ((-529839.115 6452618.654, -529180.739...",UK
230,TLK24,"Bournemouth, Christchurch and Poole",3,"MULTIPOLYGON (((-153158.042 6054365.176, -1533...",UK
231,TLK25,Dorset,3,"MULTIPOLYGON (((-193514.723 6037399.646, -1935...",UK


In [12]:
# Select ITL_LEVEL==1
level_1_2025 = itl_2025[itl_2025['ITL_LEVEL'] == 1]
merged_geometry_2025 = unary_union(level_1_2025.geometry)

new_row_2025 = gpd.GeoDataFrame([{
    'ITL_CODE': 'ITL',
    'ITL_NAME': 'United Kingdom',
    'ITL_LEVEL': 0,
    'CNTR_CODE': 'UK',
    'geometry': merged_geometry_2025
}], crs=itl_2025.crs)

itl_2025_0 = pd.concat([itl_2025, new_row_2025], ignore_index=True)
itl_2025_0

Unnamed: 0,ITL_CODE,ITL_NAME,ITL_LEVEL,geometry,CNTR_CODE
0,TLC,North East (England),1,"MULTIPOLYGON (((-92979.343 6438074.46, -93140....",UK
1,TLD,North West (England),1,"MULTIPOLYGON (((-237283.047 6307176.377, -2372...",UK
2,TLE,Yorkshire and The Humber,1,"MULTIPOLYGON (((-42588.825 6348997.161, -41892...",UK
3,TLF,East Midlands (England),1,"MULTIPOLYGON (((20764.971 6264583.126, 20664.3...",UK
4,TLG,West Midlands (England),1,"POLYGON ((-103473.72 6201427.643, -103567.255 ...",UK
...,...,...,...,...,...
236,TLN0D,Antrim and Newtownabbey,3,"POLYGON ((-446001.709 6444035.837, -446350.658...",UK
237,TLN0E,Lisburn and Castlereagh,3,"POLYGON ((-459814.875 6436192.627, -459774.891...",UK
238,TLN0F,Mid and East Antrim,3,"POLYGON ((-446001.709 6444035.837, -446006.781...",UK
239,TLN0G,Fermanagh and Omagh,3,"POLYGON ((-527029.551 6452031.269, -526758.281...",UK


In [13]:
itl_2025_0[itl_2025_0['ITL_CODE'] == 'ITL']

Unnamed: 0,ITL_CODE,ITL_NAME,ITL_LEVEL,geometry,CNTR_CODE
240,ITL,United Kingdom,0,"MULTIPOLYGON (((-570184.256 6638699.647, -5701...",UK


# Combine

## 2021

In [14]:
# NUTS_2021
itl_2025_0

Unnamed: 0,ITL_CODE,ITL_NAME,ITL_LEVEL,geometry,CNTR_CODE
0,TLC,North East (England),1,"MULTIPOLYGON (((-92979.343 6438074.46, -93140....",UK
1,TLD,North West (England),1,"MULTIPOLYGON (((-237283.047 6307176.377, -2372...",UK
2,TLE,Yorkshire and The Humber,1,"MULTIPOLYGON (((-42588.825 6348997.161, -41892...",UK
3,TLF,East Midlands (England),1,"MULTIPOLYGON (((20764.971 6264583.126, 20664.3...",UK
4,TLG,West Midlands (England),1,"POLYGON ((-103473.72 6201427.643, -103567.255 ...",UK
...,...,...,...,...,...
236,TLN0D,Antrim and Newtownabbey,3,"POLYGON ((-446001.709 6444035.837, -446350.658...",UK
237,TLN0E,Lisburn and Castlereagh,3,"POLYGON ((-459814.875 6436192.627, -459774.891...",UK
238,TLN0F,Mid and East Antrim,3,"POLYGON ((-446001.709 6444035.837, -446006.781...",UK
239,TLN0G,Fermanagh and Omagh,3,"POLYGON ((-527029.551 6452031.269, -526758.281...",UK


In [15]:
# --- For ITL dataset ---
itl_2021_0 = itl_2021_0.rename(columns={
    "ITL_CODE": "EURO_CODE",
    "ITL_NAME": "EURO_NAME",
    "ITL_LEVEL": "EURO_LEVEL"
})
# itl_df = itl_df[["EURO_CODE", "EURO_NAME", "geometry"]]  # select needed columns

# --- For NUTS dataset ---
NUTS_2021 = NUTS_2021.rename(columns={
    "NUTS_ID": "EURO_CODE",
    "NUTS_NAME": "EURO_NAME",
    "LEVL_CODE": "EURO_LEVEL"
})
# nuts_df = nuts_df[["EURO_CODE", "EURO_NAME", "geometry"]]

# --- Concatenate into a single GeoDataFrame ---
combined_2021 = pd.concat([itl_2021_0, NUTS_2021], ignore_index=True)

# --- Convert back to GeoDataFrame (keep CRS from one of the datasets) ---
combined_2021 = gpd.GeoDataFrame(combined_2021, geometry="geometry", crs=itl_2021_0.crs)

combined_2021

Unnamed: 0,EURO_CODE,EURO_NAME,EURO_LEVEL,geometry,CNTR_CODE
0,TLC,North East (England),1,"MULTIPOLYGON (((-150023.597 6421565.781, -1501...",UK
1,TLD,North West (England),1,"MULTIPOLYGON (((-195202.511 6277323.483, -1953...",UK
2,TLE,Yorkshire and The Humber,1,"MULTIPOLYGON (((-95651.593 6311598.73, -95775....",UK
3,TLF,East Midlands (England),1,"MULTIPOLYGON (((-93223.092 6182667.126, -93428...",UK
4,TLG,West Midlands (England),1,"POLYGON ((-150190.618 6302937.743, -150043.155...",UK
...,...,...,...,...,...
2238,NO0B1,Jan Mayen,3,"POLYGON ((-585620.093 7743197.571, -585306.164...",NO
2239,EE009,Kesk-Eesti,3,"MULTIPOLYGON (((1884405.458 6891313.753, 18856...",EE
2240,NO0,Norge,1,"MULTIPOLYGON (((1781507.126 7758918.947, 17830...",NO
2241,NO0B,Jan Mayen and Svalbard,2,"MULTIPOLYGON (((1881228.3 8208517.602, 1882472...",NO


In [16]:
combined_2021[combined_2021['EURO_LEVEL'] == 0]

Unnamed: 0,EURO_CODE,EURO_NAME,EURO_LEVEL,geometry,CNTR_CODE
232,ITL,United Kingdom,0,"MULTIPOLYGON (((-570192.843 6638671.643, -5702...",UK
233,AL,Shqipëria,0,"MULTIPOLYGON (((1650425.741 5211648.283, 16499...",AL
234,CZ,Česko,0,"POLYGON ((1124307.66 6090003.519, 1125097.315 ...",CZ
235,DE,Deutschland,0,"MULTIPOLYGON (((843062.368 5735775.013, 842061...",DE
236,DK,Danmark,0,"MULTIPOLYGON (((1147917.715 6503373.5, 1147753...",DK
237,CY,Κύπρος,0,"MULTIPOLYGON (((3015419.669 4434050.531, 30150...",CY
238,BE,Belgique/België,0,"MULTIPOLYGON (((378499.891 6131545.983, 378245...",BE
239,BG,България,0,"POLYGON ((1873829.714 5382911.359, 1875271.231...",BG
240,CH,Schweiz/Suisse/Svizzera,0,"POLYGON ((689976.87 5761662.831, 690334.538 57...",CH
241,AT,Österreich,0,"MULTIPOLYGON (((842061.301 5732543.425, 843062...",AT


## 2025 

In [17]:
# --- For ITL dataset ---
itl_2025_0 = itl_2025_0.rename(columns={
    "ITL_CODE": "EURO_CODE",
    "ITL_NAME": "EURO_NAME",
    "ITL_LEVEL": "EURO_LEVEL"
})

# --- For NUTS dataset ---
NUTS_2024 = NUTS_2024.rename(columns={
    "NUTS_ID": "EURO_CODE",
    "NUTS_NAME": "EURO_NAME",
    "LEVL_CODE": "EURO_LEVEL"
})

# --- Concatenate into a single GeoDataFrame ---
combined_2025 = pd.concat([itl_2025_0, NUTS_2024], ignore_index=True)

# --- Convert back to GeoDataFrame (keep CRS from one of the datasets) ---
combined_2025 = gpd.GeoDataFrame(combined_2025, geometry="geometry", crs=itl_2021_0.crs)

combined_2025

Unnamed: 0,EURO_CODE,EURO_NAME,EURO_LEVEL,geometry,CNTR_CODE
0,TLC,North East (England),1,"MULTIPOLYGON (((-92979.343 6438074.46, -93140....",UK
1,TLD,North West (England),1,"MULTIPOLYGON (((-237283.047 6307176.377, -2372...",UK
2,TLE,Yorkshire and The Humber,1,"MULTIPOLYGON (((-42588.825 6348997.161, -41892...",UK
3,TLF,East Midlands (England),1,"MULTIPOLYGON (((20764.971 6264583.126, 20664.3...",UK
4,TLG,West Midlands (England),1,"POLYGON ((-103473.72 6201427.643, -103567.255 ...",UK
...,...,...,...,...,...
2034,RO,România,0,"MULTIPOLYGON (((2142328.862 5807825.923, 21432...",RO
2035,NO,Norge,0,"MULTIPOLYGON (((1783587.536 7757929.018, 17831...",NO
2036,PL,Polska,0,"MULTIPOLYGON (((1388381.264 6457828.345, 13910...",PL
2037,PT,Portugal,0,"MULTIPOLYGON (((-686422.937 5157351.023, -6867...",PT


In [18]:
combined_2025[combined_2025['EURO_LEVEL'] ==0 ]

Unnamed: 0,EURO_CODE,EURO_NAME,EURO_LEVEL,geometry,CNTR_CODE
240,ITL,United Kingdom,0,"MULTIPOLYGON (((-570184.256 6638699.647, -5701...",UK
1995,EL,Ελλάδα,0,"MULTIPOLYGON (((2576551.636 4480196.117, 25764...",EL
1996,ES,España,0,"MULTIPOLYGON (((347746.578 4927919.43, 348590....",ES
1997,FI,Suomi/Finland,0,"MULTIPOLYGON (((1811426.216 7692927.655, 18174...",FI
1998,FR,France,0,"MULTIPOLYGON (((5145544.442 -2652108.197, 5146...",FR
1999,HR,Hrvatska,0,"MULTIPOLYGON (((1330343.362 5630462.009, 13304...",HR
2000,EE,Eesti,0,"MULTIPOLYGON (((1864339.98 6894400.552, 186456...",EE
2001,DE,Deutschland,0,"MULTIPOLYGON (((843062.356 5735775.016, 842719...",DE
2002,DK,Danmark,0,"MULTIPOLYGON (((1147654.269 6503666.403, 11480...",DK
2003,BA,Bosna I Hercegovina,0,"MULTIPOLYGON (((1390432.116 5494167.055, 13901...",BA


# Export

In [20]:
combined_2025[(combined_2025['CNTR_CODE'] == 'UK' ) & (combined_2025['EURO_LEVEL'] == 0 )]

Unnamed: 0,EURO_CODE,EURO_NAME,EURO_LEVEL,geometry,CNTR_CODE
240,ITL,United Kingdom,0,"MULTIPOLYGON (((-570184.256 6638699.647, -5701...",UK


In [24]:
combined_2021_4326[(combined_2021_4326['CNTR_CODE']=='UK') & (combined_2021_4326['EURO_LEVEL']==0)]

Unnamed: 0,EURO_CODE,EURO_NAME,EURO_LEVEL,geometry,CNTR_CODE
232,ITL,United Kingdom,0,"MULTIPOLYGON (((-7.6498 56.7838, -7.65075 56.7...",UK
293,UK,United Kingdom,0,"MULTIPOLYGON (((-3.41143 58.6402, -3.40836 58....",UK


In [21]:
# Save as Shapefile
combined_2021_4326 = combined_2021.to_crs("EPSG:4326")
combined_2021_4326.to_file(DATA_DIR/"new/EURO_2021_BGC.shp")  

# Save as GeoJSON
combined_2021.to_file(DATA_DIR/"new/EURO_2021_BGC.geojson", driver="GeoJSON")

In [22]:
# Save as Shapefile
combined_2025_4326 = combined_2025.to_crs("EPSG:4326")
combined_2025_4326.to_file(DATA_DIR/"new/EURO_2025_BGC.shp")

# Save as GeoJSON
combined_2025.to_file(DATA_DIR/"new/EURO_2025_BGC.geojson", driver="GeoJSON")

In [25]:
combined_2025_4326

Unnamed: 0,EURO_CODE,EURO_NAME,EURO_LEVEL,geometry,CNTR_CODE
0,TLC,North East (England),1,"MULTIPOLYGON (((-1.22289 54.62587, -1.225 54.6...",UK
1,TLD,North West (England),1,"MULTIPOLYGON (((-3.08309 53.25581, -3.0828 53....",UK
2,TLE,Yorkshire and The Humber,1,"MULTIPOLYGON (((-0.55549 53.69052, -0.54641 53...",UK
3,TLF,East Midlands (England),1,"MULTIPOLYGON (((0.26877 52.81586, 0.26747 52.8...",UK
4,TLG,West Midlands (England),1,"POLYGON ((-1.3319 52.16849, -1.33309 52.16735,...",UK
...,...,...,...,...,...
2034,RO,România,0,"MULTIPOLYGON (((26.70918 48.25289, 26.71999 48...",RO
2035,NO,Norge,0,"MULTIPOLYGON (((27.73163 71.09837, 27.72067 71...",NO
2036,PL,Polska,0,"MULTIPOLYGON (((18.29486 54.83511, 18.33 54.83...",PL
2037,PT,Portugal,0,"MULTIPOLYGON (((-8.18961 42.13714, -8.19285 42...",PT
