In [None]:
###
# @Author             : Monserrat López
# @Date               : 2025-04-27
# @Last Modified Date : 2025-04-28
# @Description        : Anonymization of EU data center emissions GeoJSON. 
#                       Retains only non-sensitive attributes and applies random spatial jitter to coordinates.
# @Note               : This anonymized dataset is intended for public sharing alongside the thesis project.
#                       Original, non-anonymized data is not included in this repository for confidentiality reasons.
###

In [11]:
import geopandas as gpd
import numpy as np
from shapely.affinity import translate

In [3]:
# Load the full emissions GeoJSON
input_path = '../output/12_data_center_emissions.geojson'
df = gpd.read_file(input_path)

In [4]:
# Show available columns  
print("Original columns:")
print(df.columns.tolist())

Original columns:
['url', 'address', 'clean_address', 'country_iso2', 'country_normalized', 'city_normalized', 'website', 'description', 'specs', 'region', 'datacentername', 'operator_name', 'pue_estimate', 'power_built_out_mw', 'live_power_mw', 'whitespace_sqm', 'building_size_sqm', 'tier_level', 'latitude', 'longitude', 'address_correction_type', 'location_certainty', 'power_capacity_mw', 'building_area_sqm', 'cleaned_description', 'dc_type_rule', 'combined_text', 'processed_text', 'has_datacenter_word', 'has_cloud_word', 'has_colo_word', 'has_enterprise_word', 'dc_type_nlp', 'dc_type_final', 'ws_final', 'whitespace_source', 'area_class', 'energy_low', 'energy_low_twh', 'energy_mid', 'energy_mid_twh', 'energy_high', 'energy_high_twh', 'dc_id', 'index_right', 'zoneName', 'countryKey', 'countryName', 'n_hours_x', 'n_hours_y', 'avg_carbon_intensity_gco2_kwh', 'emissions_low_tons', 'emissions_mid_tons', 'emissions_high_tons', 'geometry']


In [17]:
# Keep only safe columns
gdf_anon = df[[
    'country_iso2', 'country_normalized', 'dc_type_final', 'area_class',
    'energy_low_twh', 'energy_mid_twh', 'energy_high_twh', 'avg_carbon_intensity_gco2_kwh', 
    'emissions_low_tons', 'emissions_mid_tons', 'emissions_high_tons', 'geometry'
]].copy()

In [15]:
# Add small random jitter to geometry (~0.01 degrees ≈ ~1 km)
def jitter_geometry(geom, max_jitter=0.01):
    if geom.is_empty or geom is None:
        return geom
    else:
        return translate(
            geom,
            xoff=np.random.uniform(-max_jitter, max_jitter),
            yoff=np.random.uniform(-max_jitter, max_jitter)
        )

gdf_anon['geometry'] = gdf_anon['geometry'].apply(lambda geom: jitter_geometry(geom))

In [16]:
gdf_anon

Unnamed: 0,dc_id,country_iso2,country_normalized,dc_type_final,area_class,energy_low_twh,energy_mid_twh,energy_high_twh,avg_carbon_intensity_gco2_kwh,emissions_low_tons,emissions_mid_tons,emissions_high_tons,geometry
0,0,DE,Germany,colocation,D,0.017968,0.020228,0.022488,332.54,5974.957675,6726.524679,7478.091682,POINT (13.38815 52.45731)
1,1076,DE,Germany,enterprise,D,0.008385,0.009440,0.010494,332.54,2788.313582,3139.044850,3489.776118,POINT (7.43582 52.31083)
2,1048,DE,Germany,enterprise,D,0.008385,0.009440,0.010494,332.54,2788.313582,3139.044850,3489.776118,POINT (13.67939 51.06568)
3,1047,PL,Poland,enterprise,D,0.008385,0.009440,0.010494,703.19,5896.175581,6637.832887,7379.490192,POINT (20.92866 52.19257)
4,1046,AT,Austria,enterprise,D,0.008385,0.009440,0.010494,120.05,1006.606861,1133.224076,1259.841291,POINT (16.30213 48.13319)
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1595,60,ES,Spain,enterprise,E,0.026981,0.030835,0.034690,123.35,3328.081680,3803.521920,4278.962160,POINT (-16.50833 28.07846)
1596,1508,NL,Netherlands,hyperscale,E,0.078943,0.078943,0.078943,272.93,21545.893885,21545.893885,21545.893885,POINT (6.865 53.4288)
1597,1316,ES,Spain,enterprise,D,0.002815,0.003169,0.003523,123.35,347.221571,390.897240,434.572910,POINT (-15.42636 28.15163)
1598,973,SK,Slovakia,enterprise,D,0.011978,0.013485,0.014992,120.05,1438.009801,1618.891537,1799.773273,POINT (16.96453 48.20131)


In [19]:
# Save the anonymized GeoJSON
gdf_anon.to_file('../output/data_center_emissions_anonymized.geojson', driver='GeoJSON')