In [3]:
import pandas as pd 
import geopandas as gpd
import numpy as np
from shapely.geometry import Point
from itertools import permutations, combinations
import matplotlib.pyplot as plt
import requests
from pathlib import Path

# USA Balancing Authority Interchnage Capacity
This notebook estimates tranmsission capacity between balancing authorities in the USA

## Helper Functions 

In [4]:
def download_file(url: str, destination: str):
    response = requests.get(url)
    
    if response.status_code == 200:
        with open(destination, "wb") as file:
            file.write(response.content)
        print(f"File downloaded successfully to {destination}")
    else:
        print(f"Failed to download file. Status code: {response.status_code}")

In [5]:
def file_exists(file_path: str):
    path = Path(file_path)
    return path.is_file()

In [6]:
def create_directory(directory: str):
    d = Path(directory)
    if not d.exists():
        d.mkdir(parents=True)
        print(f"Directory '{d}' created")

## Extract flow data from EIA
Downloaded 6-month files on interchanges for 2019-2023 from [EIA930](https://www.eia.gov/electricity/gridmonitor/dashboard/electric_overview/US48/US48)

In [10]:
requests.get("https://www.eia.gov/electricity/gridmonitor/sixMonthFiles/EIA930_BALANCE_2023_Jul_Dec.csv")


KeyboardInterrupt: 

In [7]:
files = [f"{y}_{m}" for y in range(2019,2024) for m in ["Jan_Jun", "Jul_Dec"]]

create_directory("flows")

base_url = "https://www.eia.gov/electricity/gridmonitor/sixMonthFiles/EIA930_INTERCHANGE"
for f in files: 
    if not file_exists(f"flows/{f}.csv"):
        url = f"{base_url}_{f}.csv"
        download_file(url, f"flows/{f}.csv")

Directory 'flows' created


KeyboardInterrupt: 

## Import Data

In [None]:
dtypes = {
    "Balancing Authority":str,
    "Data Date":str,
    "Hour Number":int,
    "Directly Interconnected Balancing Authority":str,
    "Interchange (MW)":str,
    "Local Time at End of Hour":str,
    "UTC Time at End of Hour":str,
    "Region":str,
    "DIBA_Region":str
}

dfs = []
for f in files:
    df = pd.read_csv(f"flows/{f}.csv", dtype=dtypes)
    df["Interchange (MW)"] = df["Interchange (MW)"].astype(str)
    df["Interchange (MW)"] = df["Interchange (MW)"].str.replace(",","")
    df["Interchange (MW)"] = df["Interchange (MW)"].astype(float)
    dfs.append(df)
df = pd.concat(dfs).reset_index(drop=True)
df.head()

### Drop Retired BAs 
Manually taken from the list of reporting entities provided here by the EIA [here](https://www.eia.gov/electricity/gridmonitor/about) 

In [None]:
retired = ["AEC", "EEI", "GLHB", "GRMA", "NSB", "OVEC", "WAUE", "CFE"]
df = df[
    (~df["Balancing Authority"].isin(retired)) & 
    (~df["Directly Interconnected Balancing Authority"].isin(retired)).dropna()
]

## Format Data
Get timeseries of formatted flows to/from each region

In [None]:
tran = df.copy()
tran["From_raw"] = tran["Region"].str.cat(tran["Balancing Authority"], sep="-")
tran["To_raw"] = tran["DIBA_Region"].str.cat(tran["Directly Interconnected Balancing Authority"], sep="-")
tran.head()

In [None]:
# correct for the negative flows 
# the data can be +/- for MISO->ASCI and ASCI->MISO 
# correct everything so all flows are positive by flipping BA flow paths 
tran["From"] = tran.apply(lambda x: x["From_raw"] if x["Interchange (MW)"] >= 0 else x["To_raw"], axis=1)
tran["To"] = tran.apply(lambda x: x["To_raw"] if x["Interchange (MW)"] >= 0 else x["From_raw"], axis=1)
tran["Capacity (MW)"] = tran["Interchange (MW)"].abs()
tran.head()

In [None]:
tran["time"] = pd.to_datetime(tran["Local Time at End of Hour"])
tran = tran.set_index("time")[["To", "From", "Capacity (MW)"]]
tran.head()

## Extract Max Flow Rates
Get max flow values to/from each region

In [None]:
flows = tran.copy()
flows["To-From"] = flows["To"].str.cat(flows["From"], sep=">")
flows["From-To"] = flows["From"].str.cat(flows["To"], sep=">")
flows.head()

### Remove outliers
clip anything outside of the 5-95 percentile 

In [None]:
# takes 5-10min to run 
# https://stackoverflow.com/a/43093390
max_flow = {}
for flow in flows["To-From"].unique():
    f = flows[flows["To-From"] == flow]
    q_low = f["Capacity (MW)"].quantile(0.05)
    q_hi  = f["Capacity (MW)"].quantile(0.95)
    f_clipped = f[(f["Capacity (MW)"] < q_hi) & (f["Capacity (MW)"] > q_low)]
    if f_clipped.empty:
        max_flow[flow] = 0
    else:
        max_flow[flow] = f_clipped["Capacity (MW)"].max()
for flow in flows["From-To"].unique():
    f = flows[flows["From-To"] == flow]
    q_low = f["Capacity (MW)"].quantile(0.05)
    q_hi  = f["Capacity (MW)"].quantile(0.95)
    f_clipped = f[(f["Capacity (MW)"] < q_hi) & (f["Capacity (MW)"] > q_low)]
    if f_clipped.empty:
        max_flow[flow] = 0
    else:
        max_flow[flow] = f_clipped["Capacity (MW)"].max()

In [None]:
ba_paths = set(flows["To-From"].unique()).union(flows["From-To"].unique())

capacity = []
for ba_path in ba_paths:
    ba_1 = ba_path.split(">")[0]
    ba_2 = ba_path.split(">")[1]
    try:
        max_flow_to_from = max_flow[f"{ba_1}>{ba_2}"]
    except KeyError:
        max_flow_to_from = 0
    try:
        max_flow_from_to = max_flow[f"{ba_2}>{ba_1}"]
    except KeyError:
        max_flow_from_to = 0
        
    max_flows = sorted([max_flow_to_from, max_flow_from_to])
    
    diff = abs((max_flows[0] - max_flows[1]) / max_flows[1])
    
    if diff < 0.15:
        max_flow_bidrectional = max(max_flow_to_from, max_flow_from_to)
        capacity.append([
            ba_1, ba_2, max_flow_bidrectional, max_flow_bidrectional * (-1)
        ])
    else:
        capacity.append([
            ba_1, ba_2, max_flow_to_from, max_flow_from_to * (-1)
        ])

In [None]:
df_max_flow = pd.DataFrame(capacity, columns=["BA_From", "BA_To", "Cap (MW) +", "Cap (MW) -"])
df_max_flow.head()

## Format data for database

In [None]:
region_ba_map = pd.read_csv("./ba-mapper.csv")
region_ba_map.head()

In [None]:
region_2_code = region_ba_map.set_index("region-ba").to_dict()["code"]

In [None]:
df_max_flow_formatted = df_max_flow.copy()
df_max_flow_formatted["BA_From"] = df_max_flow_formatted["BA_From"].map(region_2_code)
df_max_flow_formatted["BA_To"] = df_max_flow_formatted["BA_To"].map(region_2_code)
df_max_flow_formatted.head()

In [None]:
data = []
for _, row in df_max_flow_formatted.iterrows(): # super inefficient :(
    order = sorted([row["BA_From"], row["BA_To"]])
    if row["BA_From"] == order[0]:
        data.append([
            f"TRN{row['BA_From']}{row['BA_To']}",
            row["BA_From"],
            row["BA_To"],
            row["Cap (MW) +"],
            row["Cap (MW) -"],
        ])
    else:
        data.append([
            f"TRN{row['BA_To']}{row['BA_From']}",
            row["BA_To"],
            row["BA_From"],
            row["Cap (MW) -"] * (-1),
            row["Cap (MW) +"] * (-1),
        ])

In [None]:
final = pd.DataFrame(data, columns=["TECHNOLOGY", "From", "To", "Cap (MW) +", "Cap (MW) -"])
final = final.drop_duplicates()
final = final.sort_values(by=["TECHNOLOGY"])
final.to_csv("Transmission-Capacity.csv", index=False)
final