# Hosting Capacity Analysis with Power Flow

From the [Sisyphean Gridworks ML Playground](https://sgridworks.com/ml-playground/guides/03-hosting-capacity.html)

## Setup

Clone the repository and install dependencies. Run this cell first.

In [None]:
!git clone https://github.com/SGridworks/Dynamic-Network-Model.git 2>/dev/null || echo 'Already cloned'
%cd Dynamic-Network-Model
!pip install -q pandas numpy matplotlib seaborn scikit-learn xgboost lightgbm pyarrow

## Load the Network Data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load SP&L datasets using the data loader API
from demo_data.load_demo_data import (
    load_network_nodes, load_network_edges, load_transformers,
    load_solar_installations, load_load_profiles
)

nodes = load_network_nodes()
edges = load_network_edges()
transformers = load_transformers()
solar = load_solar_installations()
load_profiles = load_load_profiles()

print(f"Network nodes:        {len(nodes):,}")
print(f"Network edges:        {len(edges):,}")
print(f"Transformers:         {len(transformers):,}")
print(f"Solar installations:  {len(solar):,}")

## Calculate Baseline Capacity

First, build a feeder-level summary that combines peak load, transformer capacity, and existing solar penetration. This gives us the baseline for hosting capacity screening.

In [None]:
# Calculate existing solar capacity per feeder
solar_by_feeder = solar.groupby("feeder_id")["capacity_kw"].sum().reset_index(
    name="existing_solar_kw"
)

# Calculate peak load per feeder from load profiles
peak_load = load_profiles.groupby("feeder_id")["load_mw"].max().reset_index(
    name="peak_load_mw"
)
peak_load["peak_load_kw"] = peak_load["peak_load_mw"] * 1000

# Calculate total transformer capacity per feeder
xfmr_capacity = transformers.groupby("feeder_id")["kva_rating"].sum().reset_index(
    name="total_xfmr_kva"
)

# Merge into a feeder-level summary
feeder_hc = peak_load.merge(xfmr_capacity, on="feeder_id", how="left")
feeder_hc = feeder_hc.merge(solar_by_feeder, on="feeder_id", how="left")
feeder_hc["existing_solar_kw"] = feeder_hc["existing_solar_kw"].fillna(0)

print(feeder_hc.head(10))

## Choose a Feeder to Study

In [None]:
# Pick a mid-sized feeder to study
feeder_id = feeder_hc.sort_values("peak_load_kw").iloc[len(feeder_hc) // 2]["feeder_id"]
feeder_row = feeder_hc[feeder_hc["feeder_id"] == feeder_id].iloc[0]

print(f"Studying feeder: {feeder_id}")
print(f"  Peak load:        {feeder_row['peak_load_kw']:.0f} kW")
print(f"  Transformer cap:  {feeder_row['total_xfmr_kva']:.0f} kVA")
print(f"  Existing solar:   {feeder_row['existing_solar_kw']:.0f} kW")

# Plot feeder node locations
feeder_nodes = nodes[nodes["feeder_id"] == feeder_id]

plt.figure(figsize=(8, 8))
plt.scatter(feeder_nodes["longitude"], feeder_nodes["latitude"], c="#5FCCDB", s=30)
plt.title(f"Feeder {feeder_id} Node Locations")
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.tight_layout()
plt.show()

## Incrementally Add Solar

This is the core of hosting capacity analysis. We incrementally add solar generation to the feeder and check whether thermal or voltage limits are exceeded at each step.

In [None]:
# Simplified hosting capacity: transformer capacity minus peak load minus existing solar
# This is a thermal-only approximation (no voltage analysis)

pv_steps = range(0, 5050, 50)
results = []

capacity_kw = feeder_row["total_xfmr_kva"]  # kVA ~ kW at unity PF
peak_kw = feeder_row["peak_load_kw"]
existing_solar = feeder_row["existing_solar_kw"]

for pv_kw in pv_steps:
    total_solar = existing_solar + pv_kw
    # Thermal: net reverse flow must not exceed transformer capacity
    net_flow = peak_kw - total_solar  # negative = reverse flow
    thermal_loading_pct = abs(min(net_flow, peak_kw)) / capacity_kw * 100

    # Simplified voltage rise: ~1% per 100 kW of excess solar on typical feeder
    voltage_rise_pu = max(0, (total_solar - peak_kw)) / 100 * 0.01
    voltage_pu = 1.0 + voltage_rise_pu

    results.append({
        "pv_kw": pv_kw,
        "voltage_pu": voltage_pu,
        "thermal_loading_pct": thermal_loading_pct
    })

hc = pd.DataFrame(results)
print(hc.head(10))

## Find the Hosting Capacity

In [None]:
# Find the first PV level that causes a voltage violation
voltage_limit = 1.05
voltage_violation = hc[hc["voltage_pu"] > voltage_limit]

# Find the first PV level that causes a thermal violation
thermal_limit = 100
thermal_violation = hc[hc["thermal_loading_pct"] > thermal_limit]

if not voltage_violation.empty:
    hc_voltage = voltage_violation.iloc[0]["pv_kw"]
    print(f"Voltage-limited hosting capacity: {hc_voltage} kW")
else:
    print("No voltage violation up to 5,000 kW")

if not thermal_violation.empty:
    hc_thermal = thermal_violation.iloc[0]["pv_kw"]
    print(f"Thermal-limited hosting capacity: {hc_thermal} kW")
else:
    print("No thermal violation up to 5,000 kW")

## Plot the Hosting Capacity Curve

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8), sharex=True)

# Voltage plot
ax1.plot(hc["pv_kw"], hc["voltage_pu"], color="#5FCCDB", linewidth=2)
ax1.axhline(y=1.05, color="red", linestyle="--", label="ANSI Upper Limit (1.05)")
ax1.axhline(y=0.95, color="orange", linestyle="--", label="ANSI Lower Limit (0.95)")
ax1.set_ylabel("Voltage (p.u.)")
ax1.set_title(f"Hosting Capacity Analysis — Feeder {feeder_id}")
ax1.legend()
ax1.grid(True, alpha=0.3)

# Thermal loading plot
ax2.plot(hc["pv_kw"], hc["thermal_loading_pct"], color="#2D6A7A", linewidth=2)
ax2.axhline(y=100, color="red", linestyle="--", label="Thermal Limit (100%)")
ax2.set_ylabel("Line Loading (%)")
ax2.set_xlabel("Added PV Generation (kW)")
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Test Multiple Feeders

Different feeders will have different hosting capacities depending on their transformer headroom, existing solar penetration, and peak load. Let's screen several feeders at once.

In [None]:
# Run the simplified screening for a sample of feeders
sample_feeders = feeder_hc.sample(min(10, len(feeder_hc)), random_state=42)
hosting_caps = {}

for _, row in sample_feeders.iterrows():
    fid = row["feeder_id"]
    cap = row["total_xfmr_kva"]
    peak = row["peak_load_kw"]
    existing = row["existing_solar_kw"]

    for pv_kw in range(0, 5050, 50):
        total_solar = existing + pv_kw
        thermal_pct = abs(min(peak - total_solar, peak)) / cap * 100
        voltage_pu = 1.0 + max(0, (total_solar - peak)) / 100 * 0.01

        if voltage_pu > 1.05 or thermal_pct > 100:
            hosting_caps[fid] = pv_kw - 50  # last safe level
            break
    else:
        hosting_caps[fid] = 5000  # no violation found

# Display results
hc_df = pd.DataFrame(list(hosting_caps.items()),
                      columns=["Feeder", "Hosting Capacity (kW)"])
print(hc_df)

# Bar chart
hc_df.plot(x="Feeder", y="Hosting Capacity (kW)", kind="bar",
           color="#5FCCDB", legend=False)
plt.title("Hosting Capacity by Feeder (Simplified Screening)")
plt.ylabel("Hosting Capacity (kW)")
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.show()

## What You Built and Next Steps

You just performed a simplified hosting capacity screening on the SP&L distribution network. Here's what you accomplished: