- NUTS:
2003: 1891 rows
2006: 1931 rows
2010: 1920 rows
2013: 1951 rows
2016: 2016 rows
2021: 2010 rows
2024: 1798 rows
Total: 13517

- ITL:
2021: 232
2025: 240
Total: 472

- EURO:
EURO2021: 2243
EURO2025: 2039
Total: 4282

In [5]:
# Standard library
import os
import re
import gc
import json
import numpy as np
import pandas as pd
from tqdm import tqdm
from pathlib import Path

# Geospatial
import h3
from osgeo import gdal, osr
import geopandas as gpd
import dask_geopandas as dgpd
from shapely.geometry import Point, shape
import rasterio
from rasterstats import zonal_stats

# Visualization
import plotly.express as px
import matplotlib.pyplot as plt

# Custom
from validator import validate

# Project paths
BASE_DIR = Path('/Users/wenlanzhang/PycharmProjects/Mapineq/src/data-wrangling/')
DATA_DIR = Path('/Users/wenlanzhang/Downloads/PhD_UCL/Data/Oxford')

# Accessibility to cities
data: https://developers.google.com/earth-engine/datasets/catalog/Oxford_MAP_accessibility_to_cities_2015_v1_0  
band: 'accessibility'

In [6]:
a11y_city_NUTS = pd.read_csv(DATA_DIR/'GEE/a11y/Accessibility_Zonal_Stats_NoGeom.csv')
a11y_city_NUTS = a11y_city_NUTS.drop(columns=['system:index', '.geo'])
a11y_city_NUTS

Unnamed: 0,geo,geo_source,max,mean,median,obsTime,stdDev
0,AT,NUTS2003,486,51.418883,46.510792,2015,30.774770
1,AT3,NUTS2003,486,54.411668,48.511643,2015,35.170900
2,AT1,NUTS2003,160,44.534237,43.000000,2015,23.420052
3,AT2,NUTS2003,414,53.830808,50.501134,2015,30.183735
4,AT34,NUTS2003,215,54.875849,54.000000,2015,22.304797
...,...,...,...,...,...,...,...
13512,SI037,NUTS2024,110,62.387505,61.000000,2015,19.411806
13513,SI038,NUTS2024,93,44.446849,42.000000,2015,17.158143
13514,SI043,NUTS2024,308,45.942191,42.500834,2015,18.158115
13515,SK041,NUTS2024,177,46.344584,45.000000,2015,22.320704


In [7]:
a11y_city_EURO = pd.read_csv(DATA_DIR/'GEE/a11y/Accessibility_Zonal_EURO.csv')
a11y_city_EURO = a11y_city_EURO.drop(columns=['system:index', '.geo'])
a11y_city_EURO

Unnamed: 0,geo,geo_source,max,mean,median,obsTime,stdDev
0,AL,EURO2021,377,73.045278,68.524183,2015,48.259802
1,AL0,EURO2021,377,73.045278,68.524183,2015,48.259802
2,AL01,EURO2021,353,79.397917,76.510863,2015,44.539164
3,AL02,EURO2021,221,40.854722,35.000000,2015,29.535970
4,AL03,EURO2021,377,80.132150,74.524139,2015,52.011281
...,...,...,...,...,...,...,...
4277,XK006,EURO2025,105,24.428005,19.000000,2015,18.653160
4278,XK007,EURO2025,178,20.491282,16.000000,2015,21.981900
4279,XK00,EURO2025,216,23.285947,17.000000,2015,23.768381
4280,XK0,EURO2025,216,23.285947,17.000000,2015,23.768381


In [8]:
a11y_city_ITL = pd.read_csv(DATA_DIR/'GEE/a11y/Accessibility_Zonal_ITL.csv')
a11y_city_ITL = a11y_city_ITL.drop(columns=['system:index', '.geo'])
a11y_city_ITL

Unnamed: 0,geo,geo_source,max,mean,median,obsTime,stdDev
0,TLC,ITL2021,91,24.798494,22.51231,2015,18.686063
1,TLD,ITL2021,91,16.367035,11.00000,2015,16.173867
2,TLE,ITL2021,69,14.438733,12.00000,2015,12.145465
3,TLF,ITL2021,51,12.011561,11.00000,2015,8.765051
4,TLG,ITL2021,46,8.598950,7.00000,2015,7.166869
...,...,...,...,...,...,...,...
467,TLN0C,ITL2025,94,31.174307,31.00000,2015,10.790155
468,TLN0D,ITL2025,64,13.852268,9.00000,2015,13.551279
469,TLN0E,ITL2025,37,4.741757,4.00000,2015,3.803307
470,TLN0F,ITL2025,79,19.869017,19.00000,2015,10.180384


In [11]:
a11y_city = pd.concat([a11y_city_NUTS, a11y_city_EURO, a11y_city_ITL])
a11y_city = a11y_city.rename(columns={'stdDev': 'std_dev'})
a11y_city

Unnamed: 0,geo,geo_source,max,mean,median,obsTime,std_dev
0,AT,NUTS2003,486,51.418883,46.510792,2015,30.774770
1,AT3,NUTS2003,486,54.411668,48.511643,2015,35.170900
2,AT1,NUTS2003,160,44.534237,43.000000,2015,23.420052
3,AT2,NUTS2003,414,53.830808,50.501134,2015,30.183735
4,AT34,NUTS2003,215,54.875849,54.000000,2015,22.304797
...,...,...,...,...,...,...,...
467,TLN0C,ITL2025,94,31.174307,31.000000,2015,10.790155
468,TLN0D,ITL2025,64,13.852268,9.000000,2015,13.551279
469,TLN0E,ITL2025,37,4.741757,4.000000,2015,3.803307
470,TLN0F,ITL2025,79,19.869017,19.000000,2015,10.180384


In [12]:
# Melt the dataframe to long format
a11y_city_long = a11y_city.melt(
    id_vars=['geo', 'geo_source', 'obsTime'],
    value_vars=['max', 'mean', 'median', 'std_dev'],
    var_name='statistic',
    value_name='obsValue'
)
a11y_city_long

Unnamed: 0,geo,geo_source,obsTime,statistic,obsValue
0,AT,NUTS2003,2015,max,486.000000
1,AT3,NUTS2003,2015,max,486.000000
2,AT1,NUTS2003,2015,max,160.000000
3,AT2,NUTS2003,2015,max,414.000000
4,AT34,NUTS2003,2015,max,215.000000
...,...,...,...,...,...
73079,TLN0C,ITL2025,2015,std_dev,10.790155
73080,TLN0D,ITL2025,2015,std_dev,13.551279
73081,TLN0E,ITL2025,2015,std_dev,3.803307
73082,TLN0F,ITL2025,2015,std_dev,10.180384


In [13]:
# a11y_city_long.to_csv(DATA_DIR/"GEE/a11y/Output/a11y_city.csv", index=True, index_label="id")

In [4]:
(13517 + 472 + 4282) * 4

73084

In [14]:
final = pd.read_csv(DATA_DIR/"GEE/a11y/Output/a11y_city.csv")
final

Unnamed: 0,id,geo,geo_source,obsTime,statistic,obsValue
0,0,AT,NUTS2003,2015,max,486.000000
1,1,AT3,NUTS2003,2015,max,486.000000
2,2,AT1,NUTS2003,2015,max,160.000000
3,3,AT2,NUTS2003,2015,max,414.000000
4,4,AT34,NUTS2003,2015,max,215.000000
...,...,...,...,...,...,...
73079,73079,TLN0C,ITL2025,2015,std_dev,10.790155
73080,73080,TLN0D,ITL2025,2015,std_dev,13.551279
73081,73081,TLN0E,ITL2025,2015,std_dev,3.803307
73082,73082,TLN0F,ITL2025,2015,std_dev,10.180384


In [15]:
validate(data=final)

Unnamed: 0,id,geo,geo_source,obsTime,statistic,obsValue
0,0,AT,NUTS2003,2015,max,486.000000
1,1,AT3,NUTS2003,2015,max,486.000000
2,2,AT1,NUTS2003,2015,max,160.000000
3,3,AT2,NUTS2003,2015,max,414.000000
4,4,AT34,NUTS2003,2015,max,215.000000
...,...,...,...,...,...,...
73079,73079,TLN0C,ITL2025,2015,std_dev,10.790155
73080,73080,TLN0D,ITL2025,2015,std_dev,13.551279
73081,73081,TLN0E,ITL2025,2015,std_dev,3.803307
73082,73082,TLN0F,ITL2025,2015,std_dev,10.180384
