In [1]:
import pandas as pd
from pathlib import Path
import re

# --- CONFIG
INPUT_HT = "/media/sagarkumar/New Volume/SAGAR/2-year-data/HTCABLE.csv"
INPUT_ENERGY = "/media/sagarkumar/New Volume/SAGAR/2-year-data/ENERGYAUDIT.csv"
OUTPUT_PATH = "/media/sagarkumar/New Volume/SAGAR/DATA_GENERATION/feeder_trace_22_33kv.csv"

FEEDER_ID_COL = "FEEDERID"
SRC_SWITCH_COL = "SOURCE_SWITCH_ID"
DST_SWITCH_COL = "DESTINATION_SWITCH_ID"
SRC_LOC_COL = "SOURCE_SSFL"
DST_LOC_COL = "DESTINATION_SSFL"
FUNC_LOC_COL = "FUNC_LOC"
DATE_COL = "SYSTEM_DATE"
LOAD_COL = "MD_KVA"

# --- LOAD HTCABLE.csv
ht = pd.read_csv(INPUT_HT, low_memory=False)

# Function to extract feeder token for 22kV/33kV only (case-insensitive)
def feeder_token_22_33(val):
    if not isinstance(val, str):
        val = str(val) if val is not None else ""
    p = val.upper().split("_")
    if len(p) >= 3 and p[1] in {'22KV', '33KV'}:
        return p[2]
    return None

ht["FEEDER_ID"] = ht[FEEDER_ID_COL].apply(feeder_token_22_33)
ht = ht[ht["FEEDER_ID"].notna()]  # Keep only 22kV/33kV feeders

# Build FROM_TO column as "SRC_SWITCH_ID-DST_SWITCH_ID"
ht["FROM_TO"] = ht[SRC_SWITCH_COL].astype(str) + "-" + ht[DST_SWITCH_COL].astype(str)

# --- LOAD ENERGYAUDIT.csv and aggregate
audit = pd.read_csv(INPUT_ENERGY, low_memory=False, parse_dates=[DATE_COL])
audit.columns = [c.upper() for c in audit.columns]
audit[DATE_COL] = pd.to_datetime(audit[DATE_COL], errors="coerce")

audit = audit[[FUNC_LOC_COL, DATE_COL, LOAD_COL]].dropna(subset=[FUNC_LOC_COL])
agg = (audit.groupby(FUNC_LOC_COL)
           .agg(LATEST_DT_DATE=(DATE_COL, "max"),
                DT_LOAD=(LOAD_COL,  "mean"))
           .reset_index())
agg[FUNC_LOC_COL] = agg[FUNC_LOC_COL].astype(str)

# --- MERGE by DESTINATION_SSFL = FUNC_LOC
ht[SRC_LOC_COL] = ht[SRC_LOC_COL].astype(str)
ht[DST_LOC_COL] = ht[DST_LOC_COL].astype(str)
merged = ht.merge(agg, how="left", left_on=DST_LOC_COL, right_on=FUNC_LOC_COL)

# --- OUTPUT ONLY NEEDED COLUMNS
final_cols = [
    "FEEDER_ID",
    "FROM_TO",
    SRC_LOC_COL,
    DST_LOC_COL,
    "LATEST_DT_DATE",
    "DT_LOAD"
]
merged = merged[final_cols]

# --- EXPORT
merged.to_csv(OUTPUT_PATH, index=False)
print(f"Saved {len(merged):,} rows → {OUTPUT_PATH}")

if __name__ == "__main__":
    try:
        from IPython.display import display
        display(merged.head())
    except Exception:
        pass


FileNotFoundError: [Errno 2] No such file or directory: '/media/sagarkumar/New Volume/SAGAR/2-year-data/ENERGYAUDIT.csv'