# 02 — Seller flags & windows
Input: `data/processed/surplus_kW_10min.csv` from Notebook 01  
Output:
- `reports/seller_flags_10min.csv` (True/False grid)
- `reports/seller_windows.csv` (long table [home, day, start, end, minutes])
- readable schedule print (wrapped lines)


In [47]:
from pathlib import Path
import pandas as pd
import warnings
from itertools import islice

warnings.filterwarnings("ignore", category=UserWarning, message=".*infer_datetime_format.*")

# root
CWD = Path.cwd()
ROOT = next((c for c in [CWD, CWD.parent, CWD.parent.parent] if (c / "data" / "processed").exists()), None)
if ROOT is None:
    raise FileNotFoundError("Could not find 'data/processed'. Run 01 first.")

DATA_PROCESSED = ROOT / "data" / "processed"
REPORTS = ROOT / "reports"
REPORTS.mkdir(parents=True, exist_ok=True)

SUP_FILE = DATA_PROCESSED / "surplus_kW_10min.csv"
surplus = pd.read_csv(SUP_FILE, parse_dates=[0], index_col=0).sort_index()

seller_flags = surplus.gt(0)
flags_path = REPORTS / "seller_flags_10min.csv"
seller_flags.to_csv(flags_path)

def mask_to_intervals(mask: pd.Series, freq: str = "10min"):
    if mask.empty: return []
    changed = mask.ne(mask.shift(1, fill_value=False))
    starts = mask.index[changed & mask]
    ends = mask.index[changed & ~mask].tolist()
    if mask.iloc[-1]:
        ends.append(mask.index[-1] + pd.Timedelta(freq))
    return list(zip(starts, ends))

# build windows
rows = []
for h in seller_flags.columns:
    for day, m in seller_flags[h].groupby(seller_flags.index.normalize()):
        for s, e in mask_to_intervals(m, freq="10min"):
            rows.append({"home": h, "day": day.date(), "start": s, "end": e})

seller_windows = (pd.DataFrame(rows)
                  .assign(minutes=lambda d: ((pd.to_datetime(d["end"]) - pd.to_datetime(d["start"]))
                                             .dt.total_seconds() // 60).astype(int))
                  .sort_values(["home","day","start"])
                  if rows else
                  pd.DataFrame(columns=["home","day","start","end","minutes"]))

win_path = REPORTS / "seller_windows.csv"
seller_windows.to_csv(win_path, index=False)

print("Wrote:", flags_path.as_posix(), "and", win_path.as_posix())


Wrote: e:/VPP/reports/seller_flags_10min.csv and e:/VPP/reports/seller_windows.csv


In [48]:
def chunked(iterable, n):
    it = iter(iterable)
    while True:
        part = list(islice(it, n))
        if not part: break
        yield part

def fmt(s, e):
    return f"{pd.Timestamp(s).strftime('%H:%M')}–{pd.Timestamp(e).strftime('%H:%M')} ({int((e-s).total_seconds()//60)} min)"

def print_schedule_readable(df: pd.DataFrame, max_per_line=4):
    if df.empty:
        print("No seller intervals found.")
        return
    for home, dfh in df.groupby("home"):
        print(f"\n{home}:")
        for day, dday in dfh.groupby("day"):
            intervals = [fmt(r.start, r.end) for r in dday.itertuples()]
            print(f"  {day}:")
            for group in chunked(intervals, max_per_line):
                print("    " + "   |   ".join(group))

print("Per-home seller availability (10-min slots):")
display(seller_flags.sum().to_frame("seller_slots").assign(total_slots=len(seller_flags)).assign(
    share=lambda d: (d["seller_slots"]/d["total_slots"]).round(3)
))
print("\nReadable schedule (per day):")
print_schedule_readable(seller_windows, max_per_line=4)


Per-home seller availability (10-min slots):


Unnamed: 0,seller_slots,total_slots,share
h1,38,433,0.088
h2,38,433,0.088
h3,85,433,0.196
h4,140,433,0.323
h5,85,433,0.196



Readable schedule (per day):

h1:
  2018-08-23:
    16:00–16:10 (10 min)   |   16:30–16:40 (10 min)   |   17:10–17:30 (20 min)   |   18:00–18:20 (20 min)
    19:20–20:00 (40 min)   |   20:30–21:10 (40 min)   |   21:40–22:30 (50 min)
  2018-08-24:
    16:40–16:50 (10 min)   |   17:20–17:30 (10 min)   |   18:10–18:30 (20 min)   |   18:50–19:00 (10 min)
    20:30–21:20 (50 min)   |   21:40–22:00 (20 min)
  2018-08-25:
    17:10–17:20 (10 min)   |   21:00–21:40 (40 min)   |   21:50–22:10 (20 min)

h2:
  2018-08-23:
    13:30–13:40 (10 min)   |   13:50–14:00 (10 min)   |   14:20–14:30 (10 min)   |   14:40–14:50 (10 min)
    15:00–15:10 (10 min)   |   15:20–15:30 (10 min)   |   15:40–15:50 (10 min)   |   16:30–16:40 (10 min)
    16:50–17:00 (10 min)   |   17:20–17:30 (10 min)   |   17:50–18:00 (10 min)   |   18:20–18:30 (10 min)
    18:50–19:00 (10 min)   |   19:10–19:30 (20 min)   |   19:50–20:00 (10 min)   |   20:30–20:40 (10 min)
    21:10–21:30 (20 min)   |   22:20–22:30 (10 min)   |   