In [27]:
# === Cell A: Imports & robust data loading (REPLACE this whole cell) ===
from pathlib import Path
import pandas as pd

def find_root_with_files(start: Path, subpath: str, need: list[str]) -> Path:
    """
    Walk upward from 'start' until a folder 'subpath' exists containing ALL files in 'need'
    (or, if none exist anywhere, return the first parent that has 'subpath' so we can try alternatives).
    """
    cur = start.resolve()
    found_fallback = None
    while True:
        p = cur / subpath
        if p.exists() and p.is_dir():
            if found_fallback is None:
                found_fallback = cur
            if all((p / n).exists() for n in need):
                return cur
        if cur.parent == cur:
            return found_fallback if found_fallback is not None else start
        cur = cur.parent

# 1) Locate project root (prefer where processed files exist)
start = Path.cwd()
NEEDED = ["surplus_kW_10min.csv"]  # primary target for 03
PROJECT_ROOT = find_root_with_files(start, "data/processed", NEEDED)

DATA_PROCESSED = PROJECT_ROOT / "data" / "processed"
print("Project root detected:", PROJECT_ROOT)
print("Looking in:", DATA_PROCESSED)

# 2) Try to load surplus; if missing, compute from pv/load (if available)
surplus_path = DATA_PROCESSED / "surplus_kW_10min.csv"
pv_path      = DATA_PROCESSED / "pv_kW_10min.csv"
load_path    = DATA_PROCESSED / "load_kW_10min.csv"

if surplus_path.exists():
    surplus = pd.read_csv(surplus_path, index_col=0, parse_dates=True)
    print("Loaded:", surplus_path)
else:
    if pv_path.exists() and load_path.exists():
        pv   = pd.read_csv(pv_path,   index_col=0, parse_dates=True)
        load = pd.read_csv(load_path, index_col=0, parse_dates=True)
        pv, load = pv.align(load, join="inner")
        surplus = (pv - load).astype(float)
        print("Computed surplus on the fly from pv/load (files missing):")
        print(" -", pv_path)
        print(" -", load_path)
    else:
        raise FileNotFoundError(
            "Could not find processed files.\n"
            f"Missing: {surplus_path}\n"
            "Run 01_prepare_data.ipynb first to generate processed CSVs."
        )

# 3) Optional: seller eligibility flags
can_sell = None
sell_flags_path = DATA_PROCESSED / "seller_flags_10min.csv"
if sell_flags_path.exists():
    can_sell = pd.read_csv(sell_flags_path, index_col=0, parse_dates=True).astype(bool)
    # Align shapes just in case
    can_sell = can_sell.reindex_like(surplus)
    print("Loaded:", sell_flags_path)

# 4) Basic sanity
assert isinstance(surplus.index, pd.DatetimeIndex) and surplus.index.is_monotonic_increasing
assert surplus.columns.size > 0
if can_sell is not None:
    assert can_sell.shape == surplus.shape


Project root detected: E:\VPP
Looking in: E:\VPP\data\processed
Loaded: E:\VPP\data\processed\surplus_kW_10min.csv
Loaded: E:\VPP\data\processed\seller_flags_10min.csv


In [28]:
# === Cell B: Helpers ===

def parse_home(raw: str) -> str:
    s = raw.strip().lower().replace(" ", "")
    if not s.startswith("h"):
        raise ValueError("Home must look like h1, h2, ...")
    if s not in surplus.columns:
        raise ValueError(f"Unknown home '{s}'. Available: {list(surplus.columns)}")
    return s

def _normalized_naive_index(idx: pd.DatetimeIndex) -> pd.DatetimeIndex:
    """Strip tz and normalize to midnight for day comparisons."""
    if getattr(idx, "tz", None) is not None:
        idx = idx.tz_convert("UTC").tz_localize(None)
    return idx.normalize()

def list_available_day_numbers() -> tuple[list[int], dict[int, pd.Timestamp]]:
    """
    Returns:
      - sorted list of day numbers available (e.g., [23, 24, 25])
      - mapping {day_number -> canonical full date (Timestamp)}.
    If multiple months contain the same day number, maps to the earliest occurrence.
    """
    norm_days = pd.to_datetime(_normalized_naive_index(surplus.index).unique())
    norm_days = pd.DatetimeIndex(sorted(norm_days))
    mapping = {}
    for d in norm_days:
        dn = d.day
        if dn not in mapping:
            mapping[dn] = d
    return sorted(mapping.keys()), mapping

def get_valid_day_by_number() -> pd.Timestamp:
    nums, mapping = list_available_day_numbers()
    print("Available days (day numbers only):", ", ".join(str(n) for n in nums))
    raw = input("Enter day number (e.g., 23): ").strip()
    if not re.fullmatch(r"\d{1,2}", raw):
        raise ValueError("Please enter a numeric day like 23, 24, or 25.")
    dn = int(raw)
    if dn not in mapping:
        raise ValueError(f"Day {dn} not available. Choose one of: {', '.join(map(str, nums))}.")
    return mapping[dn]

def parse_time_10min() -> pd.Timedelta:
    """
    Accepts: 12:10PM, 12;10 pm, 00:30, 23:50, etc. (';' or ':'; case-insensitive).
    Enforces 10-minute intervals. Returns a Timedelta since midnight.
    """
    raw = input("Enter the time (10-min steps, e.g., 12:10PM): ").strip().lower()
    raw = raw.replace(" ", "").replace(";", ":")
    ampm = None
    if raw.endswith("am") or raw.endswith("pm"):
        ampm = raw[-2:]
        raw = raw[:-2]
    m = re.fullmatch(r"(\d{1,2}):(\d{2})", raw)
    if not m:
        raise ValueError("Time must look like HH:MM or HH:MMAM/PM (e.g., 7:00PM).")
    hh, mm = int(m.group(1)), int(m.group(2))
    if ampm is not None:
        if not (1 <= hh <= 12): raise ValueError("For AM/PM, hour must be 1..12.")
        if ampm == "pm" and hh != 12: hh += 12
        if ampm == "am" and hh == 12: hh = 0
    else:
        if not (0 <= hh <= 23): raise ValueError("Hour must be 0..23.")
    if mm % 10 != 0:
        raise ValueError("Minutes must be a multiple of 10 (00,10,20,30,40,50).")
    return pd.Timedelta(hours=hh, minutes=mm)

def nearest_slot(ts_raw: pd.Timestamp, index: pd.DatetimeIndex) -> pd.Timestamp:
    """Snap to nearest index slot (tz-safe)."""
    idx = index
    if getattr(idx, "tz", None) is not None:
        if ts_raw.tzinfo is None:
            ts_raw = ts_raw.tz_localize(idx.tz)
        idx_utc = idx.tz_convert("UTC")
        ts_utc = ts_raw.tz_convert("UTC")
        loc = idx_utc.get_indexer([ts_utc], method="nearest")[0]
        return idx[loc]
    else:
        loc = idx.get_indexer([ts_raw], method="nearest")[0]
        return idx[loc]

def recommend_for(home: str, ts: pd.Timestamp,
                  surplus_df: pd.DataFrame,
                  can_sell_df: pd.DataFrame | None = None) -> dict:
    """
    At time ts:
      - Determine selected home's status (HIGH/LOW/NEUTRAL) from surplus sign.
      - If LOW: recommend the other home with the LARGEST surplus (seller).
      - If HIGH: recommend the other home with the LARGEST deficit (buyer).
      - Return a market snapshot listing all sellers and buyers at ts.
    """
    s_home = float(surplus_df.at[ts, home])

    row_all = surplus_df.loc[ts, :].astype(float)

    # Sellers (positive surplus); honor can_sell if provided
    sellers_mask = row_all > 0
    if can_sell_df is not None:
        sellers_mask = sellers_mask & can_sell_df.loc[ts, :].astype(bool)
    sellers = row_all[sellers_mask].sort_values(ascending=False)   # biggest surplus first

    # Buyers (negative surplus / deficit)
    buyers = row_all[row_all < 0].sort_values(ascending=True)      # most negative first

    best_seller = None
    best_seller_surplus = None
    best_buyer = None
    best_buyer_deficit = None

    if s_home < 0:
        cand = sellers.drop(labels=[home], errors="ignore")
        if not cand.empty:
            best_seller = cand.index[0]
            best_seller_surplus = float(cand.iloc[0])

    if s_home > 0:
        cand_b = buyers.drop(labels=[home], errors="ignore")
        if not cand_b.empty:
            best_buyer = cand_b.index[0]
            best_buyer_deficit = float(cand_b.iloc[0])  # negative

    status = "HIGH" if s_home > 0 else ("LOW" if s_home < 0 else "NEUTRAL")

    result = {
        "home": home,
        "timestamp": ts,
        "surplus_home_kWh": s_home,
        "status": status,
        "advice": None,
        "reason": None,
        "best_seller": best_seller,
        "best_seller_surplus_kWh": best_seller_surplus,
        "best_buyer": best_buyer,
        "best_buyer_deficit_kWh": best_buyer_deficit,  # negative
        "snapshot": {
            "sellers": sellers,  # pd.Series (+kWh)
            "buyers": buyers     # pd.Series (-kWh)
        }
    }

    # Advice text
    if status == "LOW":
        if best_seller is not None:
            result["advice"] = f"{home} is low ({s_home:.2f} kWh deficit). Buy from {best_seller}."
            result["reason"] = f"{best_seller} has the most excess at this slot: {best_seller_surplus:.2f} kWh."
        else:
            result["advice"] = f"{home} is low ({s_home:.2f} kWh deficit). No internal sellers now."
            result["reason"] = "No home with positive surplus at this slot; use battery or grid."
    elif status == "HIGH":
        if best_buyer is not None:
            result["advice"] = f"{home} has surplus ({s_home:.2f} kWh). Sell to {best_buyer}."
            result["reason"] = f"{best_buyer} needs the most energy now: {abs(best_buyer_deficit):.2f} kWh deficit."
        else:
            result["advice"] = f"{home} has surplus ({s_home:.2f} kWh). No buyers right now."
            result["reason"] = "No home with a deficit at this slot; consider charging or storing."
    else:
        result["advice"] = f"{home} is balanced (≈0 kWh). Hold / recheck next slot."
        result["reason"] = "Net surplus is ~0 kWh."

    return result


In [29]:
# === Cell C: Interactive decision ===

print("What home are you? (e.g., h1..h5)")
home = parse_home(input())

day0 = get_valid_day_by_number()      # you enter ONLY the day number (e.g., 23)
offset = parse_time_10min()

ts_raw = pd.Timestamp(day0 + offset)
ts = nearest_slot(ts_raw, surplus.index)

res = recommend_for(home, ts, surplus, can_sell)

s = float(res["surplus_home_kWh"])
status = res["status"]

print("\n--- Decision ---")
print(f"Slot selected: {ts} (nearest to your input {ts_raw})")
print(f"Home: {res['home']}  |  Status: {status}")
print(f"Your net: {s:+.2f} kWh")  # always signed

if status == "LOW":
    if res["best_seller"] is not None:
        print(f"Recommended seller: {res['best_seller']}  (+{res['best_seller_surplus_kWh']:.2f} kWh)")
    else:
        print("Recommended seller: —")
elif status == "HIGH":
    if res["best_buyer"] is not None:
        need = abs(res["best_buyer_deficit_kWh"]) if res["best_buyer_deficit_kWh"] is not None else 0.0
        print(f"Recommended buyer:  {res['best_buyer']}  (needs {need:.2f} kWh)")
    else:
        print("Recommended buyer:  —")
else:
    print("Recommended counterparty: —")

print(f"\nAdvice: {res['advice']}")
print(f"Reason: {res['reason']}")

# Market snapshot
sellers = res["snapshot"]["sellers"]
buyers  = res["snapshot"]["buyers"]

print("\n--- Sellers at this slot (descending by surplus) ---")
if sellers.empty:
    print("None")
else:
    for h, v in sellers.items():
        print(f"{h}: {v:+.2f} kWh")

print("\n--- Buyers at this slot (descending by deficit) ---")
if buyers.empty:
    print("None")
else:
    for h, v in buyers.items():
        print(f"{h}: {v:+.2f} kWh")  # negatives print with a minus


What home are you? (e.g., h1..h5)
Available days (day numbers only): 23, 24, 25, 26

--- Decision ---
Slot selected: 2018-08-23 12:10:00+00:00 (nearest to your input 2018-08-23 12:10:00)
Home: h1  |  Status: LOW
Your net: -2.00 kWh
Recommended seller: —

Advice: h1 is low (-2.00 kWh deficit). No internal sellers now.
Reason: No home with positive surplus at this slot; use battery or grid.

--- Sellers at this slot (descending by surplus) ---
None

--- Buyers at this slot (descending by deficit) ---
h3: -3.68 kWh
h2: -2.88 kWh
h1: -2.00 kWh
h5: -0.98 kWh
h4: -0.48 kWh
