# Datacenter Score Analysis

This notebook ingests recent market and weather data to compute a composite datacenter siting score across U.S. grid regions.

## Environment Setup
Ensure required dependencies are available for the workflow.

## Imports and Global Configuration


In [1]:
import json
import os
from datetime import datetime, timedelta
from functools import lru_cache
from pathlib import Path

import numpy as np
import pandas as pd
import requests
import plotly.express as px
import matplotlib.pyplot as plt
from prophet import Prophet

plt.style.use('seaborn-v0_8')
px.defaults.template = 'plotly_white'

CACHE_DIR = Path('cache')
CACHE_DIR.mkdir(exist_ok=True)
EIA_BASE_URL = 'https://api.eia.gov/v2/electricity/rto/region-data/data/'
EIA_API_KEY = os.getenv('EIA_API_KEY')
HISTORICAL_DAYS = 90
PROPHET_LOOKBACK_DAYS = 60
FORECAST_HOURS = 24 * 7


## EIA Hourly Demand Fetching


In [2]:
def _cache_path_for_region(region: str) -> Path:
    return CACHE_DIR / f'eia_{region.lower()}_hourly.csv'


@lru_cache(maxsize=None)
def fetch_eia_hourly(region: str) -> pd.DataFrame:
    """Fetch the most recent 90 days of hourly demand for the requested EIA region.

    Parameters
    ----------
    region : str
        The EIA RTO/region code (e.g., 'CAL', 'TEX').

    Returns
    -------
    pd.DataFrame
        DataFrame with columns [datetime, demand_MW, region].
    """
    cache_path = _cache_path_for_region(region)
    end = datetime.utcnow()
    start = end - timedelta(days=HISTORICAL_DAYS)
    params = {
        'api_key': EIA_API_KEY,
        'data[0]': 'value',
        'facets[respondent][]': region,
        'frequency': 'hourly',
        'start': start.strftime('%Y-%m-%dT%H'),
        'end': end.strftime('%Y-%m-%dT%H'),
        'sort[0][column]': 'period',
        'sort[0][direction]': 'desc',
        'offset': 0,
        'length': 5000,
    }
    try:
        response = requests.get(EIA_BASE_URL, params=params, timeout=30)
        response.raise_for_status()
        payload = response.json()
        data = payload.get('response', {}).get('data', [])
        if not data:
            raise ValueError('Empty dataset returned from EIA API.')
        records = []
        for item in data:
            period = item.get('period')
            value = item.get('value')
            if period is None or value is None:
                continue
            records.append({
                'datetime': pd.to_datetime(period),
                'demand_MW': float(value),
                'region': region,
            })
        df = pd.DataFrame(records)
        if df.empty:
            raise ValueError('No valid records parsed from EIA response.')
        df = df.drop_duplicates(subset='datetime').sort_values('datetime')
        df = df[df['datetime'] >= start]
        df.to_csv(cache_path, index=False)
        return df
    except Exception as exc:
        print(f'EIA API fetch failed for {region}: {exc}')
        if cache_path.exists():
            print(f'Loading cached data for {region} from {cache_path}.')
            df = pd.read_csv(cache_path, parse_dates=['datetime'])
            return df
        raise


## Forecasting and Grid Metrics


In [3]:
def _prepare_hourly_series(df_region: pd.DataFrame) -> pd.DataFrame:
    if df_region.empty:
        raise ValueError('Region dataframe is empty.')
    df = df_region.copy()
    df = df.drop_duplicates(subset='datetime').sort_values('datetime')
    df = df.set_index('datetime')
    full_range = pd.date_range(df.index.min(), df.index.max(), freq='H')
    df = df.reindex(full_range)
    df['demand_MW'] = df['demand_MW'].interpolate(method='time')
    df['region'] = df_region['region'].iloc[0]
    return df


def forecast_peak_demand(df_region: pd.DataFrame) -> float:
    prepped = _prepare_hourly_series(df_region)
    recent_start = prepped.index.max() - timedelta(days=PROPHET_LOOKBACK_DAYS)
    df_recent = prepped[prepped.index >= recent_start]
    prophet_df = df_recent.reset_index().rename(columns={'index': 'ds', 'demand_MW': 'y'})
    model = Prophet(
        growth='flat',
        daily_seasonality=True,
        weekly_seasonality=True,
        yearly_seasonality=False
    )
    model.add_country_holidays(country_name='US')
    model.fit(prophet_df)
    future = model.make_future_dataframe(periods=FORECAST_HOURS, freq='H', include_history=False)
    forecast = model.predict(future)
    peak_forecast = float(forecast['yhat'].max())
    return peak_forecast


def compute_volatility(df_region: pd.DataFrame) -> float:
    prepped = _prepare_hourly_series(df_region)
    rolling_std = prepped['demand_MW'].rolling(window=24, min_periods=1).std()
    return float(rolling_std.iloc[-1])


def compute_renewable_proxy(df_region: pd.DataFrame) -> float:
    prepped = _prepare_hourly_series(df_region)
    mean_load = prepped['demand_MW'].mean()
    return float(1.0 / (1.0 + mean_load))


def compute_carbon_proxy(renewable_proxy: float) -> float:
    return float(1.0 - renewable_proxy)


## Weather Data via Open-Meteo


In [4]:
@lru_cache(maxsize=None)
def fetch_temperature(lat: float, lon: float) -> float:
    url = 'https://api.open-meteo.com/v1/forecast'
    params = {
        'latitude': lat,
        'longitude': lon,
        'daily': 'temperature_2m_mean',
        'past_days': 60,
        'timezone': 'UTC',
    }
    try:
        response = requests.get(url, params=params, timeout=30)
        response.raise_for_status()
        data = response.json()
        temps = data.get('daily', {}).get('temperature_2m_mean', [])
        if not temps:
            raise ValueError('Temperature series is empty.')
        return float(np.mean(temps))
    except Exception as exc:
        print(f'Open-Meteo fetch failed for ({lat}, {lon}): {exc}')
        return float('nan')


region_coords = {
    'CAL': (36.5, -119.5),
    'CAR': (35.5, -80.0),
    'CENT': (38.5, -94.5),
    'FLA': (28.0, -82.0),
    'MIDA': (39.0, -77.0),
    'MIDW': (42.0, -89.0),
    'NE': (42.5, -72.5),
    'NY': (42.9, -75.3),
    'NW': (45.5, -120.5),
    'SE': (33.0, -84.0),
    'SW': (36.0, -111.5),
    'TEN': (36.0, -86.0),
    'TEX': (31.0, -99.0),
}


## Compute Datacenter Scores


In [5]:
records = []
for region, (lat, lon) in region_coords.items():
    try:
        df_region = fetch_eia_hourly(region)
    except Exception as exc:
        print(f'Skipping region {region} due to data issues: {exc}')
        continue
    df_region = df_region.sort_values('datetime')
    latest_demand = float(df_region['demand_MW'].iloc[-1]) if not df_region.empty else float('nan')
    volatility = compute_volatility(df_region)
    peak = forecast_peak_demand(df_region)
    renewable_proxy = compute_renewable_proxy(df_region)
    carbon_proxy = compute_carbon_proxy(renewable_proxy)
    avg_temp = fetch_temperature(lat, lon)
    records.append({
        'region': region,
        'price': latest_demand,
        'peak_forecast': peak,
        'volatility': volatility,
        'renewable_proxy': renewable_proxy,
        'carbon_proxy': carbon_proxy,
        'avg_temp': avg_temp,
        'lat': lat,
        'lon': lon,
    })

dc_df = pd.DataFrame(records)
if dc_df.empty:
    dc_df = pd.DataFrame(columns=['region', 'price', 'peak_forecast', 'volatility', 'renewable_proxy', 'carbon_proxy', 'avg_temp', 'lat', 'lon'])
else:
    metrics_to_normalize = {
        'price': 'price_norm',
        'peak_forecast': 'peak_norm',
        'volatility': 'volatility_norm',
        'renewable_proxy': 'renewable_norm',
        'carbon_proxy': 'carbon_norm',
        'avg_temp': 'temp_norm',
    }
    for metric, norm_col in metrics_to_normalize.items():
        col = dc_df[metric]
        col_min, col_max = col.min(), col.max()
        if np.isfinite(col_min) and np.isfinite(col_max) and col_max != col_min:
            dc_df[norm_col] = (col - col_min) / (col_max - col_min)
        else:
            dc_df[norm_col] = 0.0
    dc_df['profitability'] = (
        0.40 * (1 - dc_df['price_norm']) +
        0.30 * (1 - dc_df['peak_norm']) +
        0.30 * (1 - dc_df['volatility_norm'])
    )
    dc_df['sustainability'] = (
        0.35 * dc_df['renewable_norm'] +
        0.30 * (1 - dc_df['carbon_norm']) +
        0.20 * (1 - dc_df['temp_norm']) +
        0.15 * dc_df['renewable_norm']
    )
    dc_df['hybrid'] = 0.5 * dc_df['profitability'] + 0.5 * dc_df['sustainability']
    dc_df = dc_df.sort_values('hybrid', ascending=False).reset_index(drop=True)

final_columns = ['region', 'price', 'peak_forecast', 'volatility', 'renewable_proxy', 'carbon_proxy', 'avg_temp', 'profitability', 'sustainability', 'hybrid', 'lat', 'lon']
for col in final_columns:
    if col not in dc_df.columns:
        dc_df[col] = np.nan
dc_df_final = dc_df[final_columns]
dc_df_final.to_csv('datacenter_scores.csv', index=False)
dc_df_final

  end = datetime.utcnow()
  full_range = pd.date_range(df.index.min(), df.index.max(), freq='H')
  full_range = pd.date_range(df.index.min(), df.index.max(), freq='H')
01:24:02 - cmdstanpy - INFO - Chain [1] start processing
01:24:02 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
  comp = np.matmul(X, beta_c.transpose())
  comp = np.matmul(X, beta_c.transpose())
  comp = np.matmul(X, beta_c.transpose())
  Xb_a = np.matmul(seasonal_features.values,
  Xb_a = np.matmul(seasonal_features.values,
  Xb_a = np.matmul(seasonal_features.values,
  Xb_m = np.matmul(seasonal_features.values, beta * s_m.values)
  Xb_m = np.matmul(seasonal_features.values, beta * s_m.values)
  Xb_m = np.matmul(seasonal_features.values, beta * s_m.values)
  full_range = pd.date_range(df.index.min(), df.index.max(), freq='H')
  end = datetime.utcnow()
  full_range = pd.date_range(df.index.min(), df.index.max(), freq='H')
  full_range = pd.date_range(df.index.min(), df.index.max(), freq='H')
01

Unnamed: 0,region,price,peak_forecast,volatility,renewable_proxy,carbon_proxy,avg_temp,profitability,sustainability,hybrid,lat,lon
0,NE,15303.0,14838.390025,1231.775153,8.5e-05,0.999915,10.410448,0.984707,0.974768,0.979737,42.5,-72.5
1,SW,13983.0,16300.739885,1192.952044,7.6e-05,0.999924,14.537313,0.988535,0.820931,0.904733,36.0,-111.5
2,NY,19082.0,18640.660856,1687.381197,6.5e-05,0.999935,8.664179,0.920411,0.779213,0.849812,42.9,-75.3
3,TEN,18220.0,20367.396969,1106.198933,5.8e-05,0.999942,16.162687,0.960063,0.594522,0.777292,36.0,-86.0
4,CAR,22636.0,27588.140633,2320.782382,4.3e-05,0.999957,16.032836,0.825998,0.438923,0.632461,35.5,-80.0
5,SE,25652.0,29348.207154,1608.485143,4e-05,0.99996,17.538806,0.856509,0.387777,0.622143,33.0,-84.0
6,CENT,32566.0,35618.09634,1701.323296,3.1e-05,0.999969,16.201493,0.794526,0.308256,0.551391,38.5,-94.5
7,FLA,28057.0,38420.000586,2439.566568,3.4e-05,0.999966,22.50597,0.752918,0.240808,0.496863,28.0,-82.0
8,NW,41302.0,43708.034216,3208.017237,2.6e-05,0.999974,11.059701,0.615835,0.319089,0.467462,45.5,-120.5
9,CAL,32137.0,35510.001238,3912.770329,3.3e-05,0.999967,18.529851,0.638147,0.287519,0.462833,36.5,-119.5


## Visualize Scores


In [6]:
if not dc_df_final.empty:
    melted = dc_df_final.melt(id_vars=['region'], value_vars=['profitability', 'sustainability', 'hybrid'], var_name='metric', value_name='score')
    fig = px.bar(melted, x='region', y='score', color='metric', barmode='group', title='Datacenter Score Components by Region')
    fig.show()
else:
    print('No data available for bar chart visualization.')


In [7]:
dc_df_final

Unnamed: 0,region,price,peak_forecast,volatility,renewable_proxy,carbon_proxy,avg_temp,profitability,sustainability,hybrid,lat,lon
0,NE,15303.0,14838.390025,1231.775153,8.5e-05,0.999915,10.410448,0.984707,0.974768,0.979737,42.5,-72.5
1,SW,13983.0,16300.739885,1192.952044,7.6e-05,0.999924,14.537313,0.988535,0.820931,0.904733,36.0,-111.5
2,NY,19082.0,18640.660856,1687.381197,6.5e-05,0.999935,8.664179,0.920411,0.779213,0.849812,42.9,-75.3
3,TEN,18220.0,20367.396969,1106.198933,5.8e-05,0.999942,16.162687,0.960063,0.594522,0.777292,36.0,-86.0
4,CAR,22636.0,27588.140633,2320.782382,4.3e-05,0.999957,16.032836,0.825998,0.438923,0.632461,35.5,-80.0
5,SE,25652.0,29348.207154,1608.485143,4e-05,0.99996,17.538806,0.856509,0.387777,0.622143,33.0,-84.0
6,CENT,32566.0,35618.09634,1701.323296,3.1e-05,0.999969,16.201493,0.794526,0.308256,0.551391,38.5,-94.5
7,FLA,28057.0,38420.000586,2439.566568,3.4e-05,0.999966,22.50597,0.752918,0.240808,0.496863,28.0,-82.0
8,NW,41302.0,43708.034216,3208.017237,2.6e-05,0.999974,11.059701,0.615835,0.319089,0.467462,45.5,-120.5
9,CAL,32137.0,35510.001238,3912.770329,3.3e-05,0.999967,18.529851,0.638147,0.287519,0.462833,36.5,-119.5


In [9]:
dc_df_final

Unnamed: 0,region,price,peak_forecast,volatility,renewable_proxy,carbon_proxy,avg_temp,profitability,sustainability,hybrid,lat,lon
0,NE,15303.0,14838.390025,1231.775153,8.5e-05,0.999915,10.410448,0.984707,0.974768,0.979737,42.5,-72.5
1,SW,13983.0,16300.739885,1192.952044,7.6e-05,0.999924,14.537313,0.988535,0.820931,0.904733,36.0,-111.5
2,NY,19082.0,18640.660856,1687.381197,6.5e-05,0.999935,8.664179,0.920411,0.779213,0.849812,42.9,-75.3
3,TEN,18220.0,20367.396969,1106.198933,5.8e-05,0.999942,16.162687,0.960063,0.594522,0.777292,36.0,-86.0
4,CAR,22636.0,27588.140633,2320.782382,4.3e-05,0.999957,16.032836,0.825998,0.438923,0.632461,35.5,-80.0
5,SE,25652.0,29348.207154,1608.485143,4e-05,0.99996,17.538806,0.856509,0.387777,0.622143,33.0,-84.0
6,CENT,32566.0,35618.09634,1701.323296,3.1e-05,0.999969,16.201493,0.794526,0.308256,0.551391,38.5,-94.5
7,FLA,28057.0,38420.000586,2439.566568,3.4e-05,0.999966,22.50597,0.752918,0.240808,0.496863,28.0,-82.0
8,NW,41302.0,43708.034216,3208.017237,2.6e-05,0.999974,11.059701,0.615835,0.319089,0.467462,45.5,-120.5
9,CAL,32137.0,35510.001238,3912.770329,3.3e-05,0.999967,18.529851,0.638147,0.287519,0.462833,36.5,-119.5


In [17]:

fig_map = px.scatter_geo(
    dc_df_final,
    lat='lat',
    lon='lon',
    size='hybrid',
    color='hybrid',
    hover_name='region',
    projection='albers usa',
    title='Hybrid Datacenter Score Across U.S. Grid Regions'
)
fig_map.show()