In [None]:
###
# @Author             : Monserrat López
# @Date               : 2025-03-05
# @Last Modified Date : 2025-04-25
# @Description        : Estimation of data center annual energy consumption across the EU 
#                       using area-based scenarios adapted from Jerléus et al. (2024). 
#                       Includes EU-wide, country-level, and data center type-level aggregations.
# @Note               : This script is intended for academic research purposes only.
#                       Some original raw data collected during the research is not included in this repository for confidentiality reasons.
###

In [None]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

### Estimations of DC Energy Consumption

In [None]:
# Set the visualization style
sns.set(style="whitegrid")
plt.rcParams['figure.figsize'] = (12, 8)

# Load the dataset
df = pd.read_csv('../output/09_classified_DC_EU27.csv')

In [None]:
print(df['whitespace_sqm'].isna().sum())  # Total missing
print(df['whitespace_sqm'].notna().sum())  # Total non-missing

In [None]:
# Area Class Assignment from Jerléus et al.
def classify_area_bracket(area):
    if pd.isna(area):
        return None
    elif area < 9.3:
        return 'A'
    elif area < 92.82:
        return 'B'
    elif area < 185.7:
        return 'C'
    elif area < 1858:
        return 'D'
    else:
        return 'E'
    
df['area_class'] = df['whitespace_sqm'].apply(classify_area_bracket)

In [None]:
# Define scenario table
scenario_table = {
    'A': {'itd': None, 'pue': {'low': 2.00, 'mid': 2.00, 'high': 2.00}},  # <9.3 m²
    'B': {'itd': 0.43, 'pue': {'low': 2.15, 'mid': 2.35, 'high': 2.55}},  # 9.3–92.82 m²
    'C': {'itd': 0.65, 'pue': {'low': 1.68, 'mid': 1.88, 'high': 2.08}},  # 46.5–185.7 m²
    'D': {'itd': 0.86, 'pue': {'low': 1.59, 'mid': 1.79, 'high': 1.99}},  # 185.8–1858 m²
    'E': {'itd': 1.10, 'pue': {'low': 1.40, 'mid': 1.60, 'high': 1.80}},  # >1858 m²
}

In [None]:
### Energy estimation function
def compute_energy_bounds(row, scenario):
    area = row['whitespace_sqm']
    dc_type = row['dc_type_final']
    cls = row['area_class']

    if pd.isnull(area) or pd.isnull(cls):
        return None

    if dc_type == 'hyperscale':
        pue = 1.13
        itd = 1.10
    else:
        pue = scenario_table[cls]['pue'][scenario]
        itd = scenario_table[cls]['itd']
    
    if itd is None or pue is None:
        return None
    
    return (itd / 1000) * pue * area * 8760  # Convert kW to MW × hours/year = MWh

In [None]:
##  Compute energy scenarios
for scenario in ['low', 'mid', 'high']:
    df[f'energy_{scenario}'] = df.apply(lambda row: compute_energy_bounds(row, scenario), axis=1)
    df[f'energy_{scenario}_twh'] = df[f'energy_{scenario}'] / 1e6

### Estimations

In [None]:
# EU-wide energy summary
print("\nEU-Level Annual Energy Use (TWh):")
for scenario in ['low', 'mid', 'high']:
    total_twh = df[f'energy_{scenario}_twh'].sum()
    print(f"{scenario.capitalize()}: {total_twh:.2f} TWh")

In [None]:
### Analysis

# --- National-level totals ---
country_energy = df.groupby('country_normalized').agg({
    'energy_low_twh': 'sum',
    'energy_mid_twh': 'sum',
    'energy_high_twh': 'sum'
}).reset_index()

# Optional: sort by mid-scenario energy
country_energy = country_energy.sort_values(by='energy_mid_twh', ascending=False)

print(country_energy.head(10))  # Top 10 countries

In [None]:
# Round all numeric columns to 3 decimal places
country_energy = country_energy.round({
    'energy_low_twh': 2,
    'energy_mid_twh': 2,
    'energy_high_twh': 2
})

In [None]:
# Plot top 10 countries
top10 = country_energy.head(10)
plt.figure(figsize=(10, 6))
sns.barplot(data=top10, y='country_normalized', x='energy_mid_twh')
plt.xlabel("Estimated Annual Energy Consumption (TWh)")
plt.ylabel("Country")
plt.title("Top 10 EU Countries by Data Center Energy Use")
plt.tight_layout()
plt.show()

In [None]:
# --- Total EU-level energy consumption (TWh/year) ---
eu_energy_summary = {
    'Scenario': ['Low', 'Mid', 'High'],
    'Total Energy (TWh)': [
        df['energy_low_twh'].sum(),
        df['energy_mid_twh'].sum(),
        df['energy_high_twh'].sum()
    ]
}

import pandas as pd
eu_energy_df = pd.DataFrame(eu_energy_summary)
print("EU-Level Data Center Energy Estimates (Area-Based):")
print(eu_energy_df)


In [None]:
# Export the cleaned European datacenter DataFrame to a CSV file
csv_filename = "../output/10_EC_DC_EU27.csv"
df.to_csv(csv_filename, index=False, encoding="utf-8")

In [None]:
# Calculate average energy use by data center type
avg_energy_by_type = df.groupby('dc_type_final').agg({
    'energy_low_twh': 'mean',
    'energy_mid_twh': 'mean',
    'energy_high_twh': 'mean'
}).reset_index()

# Round to 3 decimal places for readability
avg_energy_by_type = avg_energy_by_type.round(3)

# Sort by mid-scenario energy to see types with highest average energy use
avg_energy_by_type = avg_energy_by_type.sort_values(by='energy_mid_twh', ascending=False)

print("Average Energy Use by Data Center Type (TWh/year):")
print(avg_energy_by_type)