In [44]:
import json

import pandas as pd
import polars as pl
from util import mock_snakemake

if "snakemake" not in globals() or hasattr(snakemake, "mock"):  # noqa: F821
    snakemake = mock_snakemake("process_line_data")

In [45]:
geojson = json.load(open(snakemake.input[0]))
df = pd.json_normalize(geojson, record_path=["features"])
df.head()

Unnamed: 0,id,type,geometry.coordinates,geometry.type,properties.br_r,properties.rate_a,properties.Comments,properties.br_b,properties.Lon2,properties.Lon1,...,properties.Lat1,properties.f_bus,properties.t_bus,properties.CATS_ID,properties.Lat2,properties.transformer,properties.Structure_Type,properties.Type,properties.Circuit,properties.Structure_Material
0,0,Feature,"[[-122.24291409889523, 37.77242586670423], [-1...",LineString,4.8e-05,1.75,,2.7e-05,-122.243143,-122.242914,...,37.772426,1,2410,1,37.772041,False,,,,
1,1,Feature,"[[-122.24314284220173, 37.77204138338957], [-1...",LineString,0.000914,4.37,,0.002321,-122.214479,-122.243143,...,37.772041,2410,20,2,37.763002,False,,,,
2,2,Feature,"[[-122.24314284220173, 37.77204138338957], [-1...",LineString,0.00354,1.85,,0.002469,-122.291454,-122.243143,...,37.772041,2410,2411,3,37.779166,False,,,,
3,3,Feature,"[[-122.29145408356094, 37.779165922885475], [-...",LineString,0.000156,1.42,,7.1e-05,-122.292698,-122.291454,...,37.779166,2411,1618,4,37.779322,False,,,,
4,4,Feature,"[[-122.29145408356094, 37.779165922885475], [-...",LineString,0.000648,2.19,,0.000537,-122.289818,-122.291454,...,37.779166,2411,1617,5,37.788672,False,,,,


In [None]:
# Code to explore the MultiLineString geometry type. They appear to but just like the normal LineString types.
# geojson["features"] = [
#     f for f in geojson["features"] if f["geometry"]["type"] != "LineString"
# ]
# json.dump(geojson, open(snakemake.output.special_features, "w"))

In [38]:
df2 = df.drop(columns=(c for c in df.columns if not c.startswith("properties.")))
df2.columns = df2.columns.str.replace("properties.", "", regex=False)
df2.head()

Unnamed: 0,br_r,rate_a,Comments,br_b,Lon2,Lon1,br_x,kV,Lat1,f_bus,t_bus,CATS_ID,Lat2,transformer,Structure_Type,Type,Circuit,Structure_Material
0,4.8e-05,1.75,,2.7e-05,-122.243143,-122.242914,0.000219,115.0,37.772426,1,2410,1,37.772041,False,,,,
1,0.000914,4.37,,0.002321,-122.214479,-122.243143,0.007485,115.0,37.772041,2410,20,2,37.763002,False,,,,
2,0.00354,1.85,,0.002469,-122.291454,-122.243143,0.016138,115.0,37.772041,2410,2411,3,37.779166,False,,,,
3,0.000156,1.42,,7.1e-05,-122.292698,-122.291454,0.000469,115.0,37.779166,2411,1618,4,37.779322,False,,,,
4,0.000648,2.19,,0.000537,-122.289818,-122.291454,0.004104,115.0,37.779166,2411,1617,5,37.788672,False,,,,


In [39]:
# See description of columns here: https://matpower.org/docs/ref/matpower5.0/caseformat.html
col_name_map = {
    # "CATS_ID": "id",
    "f_bus": "from_bus",
    "t_bus": "to_bus",
    # "kV": "voltage_kv",
    "rate_a": "line_rating",
    # "br_r": "resistance",
    "br_x": "reactance",
}
df3 = df2.rename(columns=col_name_map)
df3: pl.DataFrame = pl.from_pandas(df3[col_name_map.values()])
df3.head()

from_bus,to_bus,line_rating,reactance
i64,i64,f64,f64
1,2410,1.75,0.000219
2410,20,4.37,0.007485
2410,2411,1.85,0.016138
2411,1618,1.42,0.000469
2411,1617,2.19,0.004104


In [40]:
assert df3.filter(pl.col("from_bus") == pl.col("to_bus")).height == 0, (
    "Lines must not connect a bus to itself"
)

In [42]:
df4 = df3.with_columns(
    pl.when(pl.col("from_bus") < pl.col("to_bus"))
    .then(pl.col("from_bus"))
    .otherwise(pl.col("to_bus"))
    .alias("from_bus"),
    pl.when(pl.col("from_bus") < pl.col("to_bus"))
    .then(pl.col("to_bus"))
    .otherwise(pl.col("from_bus"))
    .alias("to_bus"),
)
df4

from_bus,to_bus,line_rating,reactance
i64,i64,f64,f64
1,2410,1.75,0.000219
20,2410,4.37,0.007485
2410,2411,1.85,0.016138
1618,2411,1.42,0.000469
1617,2411,2.19,0.004104
…,…,…,…
8724,8726,3.82,0.028531
7686,7687,0.469016,0.464432
7876,7877,0.2,0.544565
8692,8693,0.836464,0.130255


In [43]:
df4.write_parquet(snakemake.output[0])