In [None]:
# Cell 0

import re, math, json, itertools, collections
from pathlib import Path
from dataclasses import dataclass, field, asdict

from openpyxl.utils.cell import (
    coordinate_from_string,
    column_index_from_string,
    get_column_letter,
)

import openpyxl
import networkx as nx

In [None]:
# Cell 1

# Point to your workbook
WB_PATH = Path("../data/LIAS_Senegal.xlsx")

# Load with formulas preserved
wb = openpyxl.load_workbook(WB_PATH, data_only=False, read_only=False)
print("Sheets:", wb.sheetnames)

In [None]:
# Cell 2
# Index sheets
SHEETS = {ws.title: ws for ws in wb.worksheets}

# Index Excel Tables (structured references)
TABLE_INDEX = {}  # name -> (sheet_name, table_obj)
for ws in wb.worksheets:
    for name, tbl in ws.tables.items():
        TABLE_INDEX[name] = (ws.title, tbl)

def a1_to_rc(a1):
    """Return (row, col) 1-based from A1 like 'E42'."""
    col_s, row = coordinate_from_string(a1)
    return int(row), column_index_from_string(col_s)

def col_letter(idx: int) -> str:
    return get_column_letter(idx)

def rng_to_cells(sheet_name, a1_range):
    """Expand 'A2:C5' to list of ('Sheet', 'A2')... 'C5'."""
    ws = SHEETS[sheet_name]
    min_col, min_row, max_col, max_row = openpyxl.utils.range_boundaries(a1_range)
    out = []
    for r in range(min_row, max_row+1):
        for c in range(min_col, max_col+1):
            out.append((sheet_name, f"{col_letter(c)}{r}"))
    return out

def get_cell(ws, addr):
    return ws[addr]

def is_text(val):
    return isinstance(val, str) and val.strip() != ""

def is_all_caps(s):
    letters = "".join(ch for ch in s if ch.isalpha())
    return bool(letters) and letters.isupper()

def clean(s):
    return re.sub(r"\s+", " ", str(s)).strip()


In [None]:
# Cell 3

def nearest_headers(ws, cell_addr, max_gap=3):
    """Find header stack above and left (skips up to max_gap blank rows/cols)."""
    r, c = a1_to_rc(cell_addr)
    above = []
    # Upwards header stack
    gap = 0
    rr = r - 1
    while rr >= 1 and gap <= max_gap:
        v = ws.cell(rr, c).value
        if is_text(v):
            above.append(clean(v))
            gap = 0
        else:
            gap += 1
        rr -= 1
    # Left header run
    left = []
    gap = 0
    cc = c - 1
    while cc >= 1 and gap <= max_gap:
        v = ws.cell(r, cc).value
        if is_text(v):
            left.append(clean(v))
            gap = 0
        else:
            gap += 1
        cc -= 1
    return above[::-1], left[::-1]  # nearest first

def nearest_region_title(ws, cell_addr, search_rows=25):
    """Scan upwards for bold/all-caps/merged-looking title lines."""
    r, c = a1_to_rc(cell_addr)
    best = None
    for rr in range(r-1, max(1, r-search_rows)-1, -1):
        v = ws.cell(rr, c).value
        if not is_text(v):
            continue
        text = clean(v)
        font = ws.cell(rr, c).font
        boldish = bool(font and font.bold)
        if boldish or is_all_caps(text) or len(text) > 40:
            best = text
            break
    return best

def guess_units_from_headers(headers):
    # Only infer units when headers look like text (not formulas/glyphs)
    toks = ["%", "percent", "usd", "$", "eur", "per", "/mo", "month", "months", "people", "persons"]
    words = " ".join(h for h in headers if isinstance(h, str) and not h.strip().startswith("=")).lower()
    hits = [t for t in toks if t in words]
    return ", ".join(sorted(set(hits))) if hits else None


In [None]:
# Cell 4
PARAM_SHEET_HINTS = {"param", "assumption", "input"}

def tag_role(sheet_name, cell_obj, referenced_by_count=0):
    v = cell_obj.value
    is_const = not (isinstance(v, str) and v.startswith("="))
    sheet_hint = any(h in sheet_name.lower() for h in PARAM_SHEET_HINTS)
    if is_const and (sheet_hint or referenced_by_count >= 10):
        return "parameter"
    if not is_const:
        return "derived"
    # small integers are often controls
    try:
        num = float(v)
        if num.is_integer() and -10 <= num <= 10:
            return "control"
    except Exception:
        pass
    return "constant"


In [None]:
# Cell 5
# Regex for A1 refs possibly with sheet:  Sheet!$A$1  or 'Weird Name'!B3
A1_REF = re.compile(
    r"(?:(?:'([^']+)'|([A-Za-z0-9_ ]+))!)?\$?([A-Z]{1,3})\$?(\d+)",
    re.IGNORECASE,
)
# Basic SUMIFS/XLOOKUP/INDEX/MATCH recognition
RE_SUMIFS = re.compile(r"SUMIFS\s*\(", re.I)
RE_XLOOK  = re.compile(r"XLOOKUP\s*\(", re.I)
RE_INDEX  = re.compile(r"INDEX\s*\(", re.I)
RE_MATCH  = re.compile(r"MATCH\s*\(", re.I)
RE_SUMPROD= re.compile(r"SUMPRODUCT\s*\(", re.I)
# Structured ref like Table1[Amount] or [@Amount]
RE_STRUCT = re.compile(r"(?:(\w+))?\s*\[\s*@?\s*([^\]]+?)\s*\]")

def parse_structured_refs(formula):
    """Return list of ('table','column') or (None,'@col') patterns."""
    out = []
    for m in RE_STRUCT.finditer(formula):
        tbl, col = m.groups()
        out.append((tbl, col.strip()))
    return out

def structured_to_ranges(sheet_name, tbl_name, col_label):
    """Map TableName[col] to concrete cell addresses (approximate)."""
    if tbl_name not in TABLE_INDEX:
        return []
    t_sheet, tbl = TABLE_INDEX[tbl_name]
    # Columns are in tbl.tableColumns; need index
    columns = [tc.name for tc in tbl.tableColumns]
    if col_label not in columns:
        return []
    idx = columns.index(col_label)  # 0-based
    # Expand table ref
    min_col, min_row, max_col, max_row = openpyxl.utils.range_boundaries(tbl.ref)
    col_idx = min_col + idx  # header col
    # Data region (skip header row)
    addrs = []
    for r in range(min_row+1, max_row+1):
        addrs.append((t_sheet, f"{col_letter(col_idx)}{r}"))
    return addrs


def extract_a1_refs(default_sheet_name: str, formula: str):
    """Return a list of (sheet, addr) for A1 references.
    If no explicit sheet, default to the current sheet."""
    out = []
    for m in A1_REF.finditer(formula):
        sheet_quoted, sheet_unquoted, col, row = m.groups()
        sheet = (sheet_quoted or sheet_unquoted or default_sheet_name).strip()
        out.append((sheet, f"{col.upper()}{row}"))
    return out

def extract_all_refs(sheet_name, formula):
    deps = []
    if not formula or not formula.startswith("="):
        return deps
    # A1 refs
    deps.extend(extract_a1_refs(sheet_name, formula))
    # Structured refs (approximate expansion)
    for (tbl, col) in parse_structured_refs(formula):
        if col.startswith("@"):
            col = col.lstrip("@").strip()
        if tbl is None:
            continue
        deps.extend(structured_to_ranges(sheet_name, tbl, col))
    # Dedup
    return list(dict.fromkeys(deps))


In [None]:
# Cell 6
G = nx.DiGraph()  # edge: source -> target (source feeds target)

# Add nodes and edges
for sname, ws in SHEETS.items():
    for row in ws.iter_rows():
        for cell in row:
            addr = cell.coordinate
            node = (sname, addr)
            G.add_node(node)
            v = cell.value
            if isinstance(v, str) and v.startswith("="):
                deps = extract_all_refs(sname, v)
                for dep in deps:
                    G.add_edge(dep, node)

# Build referenced_by counts for role tagging
ref_by = collections.Counter()
for u, v in G.edges():
    ref_by[u] += 1


In [None]:
# Cell 7
@dataclass
class CellMeta:
    sheet: str
    addr: str
    formula: str | None = None
    constant: str | float | None = None
    headers_above: list[str] = field(default_factory=list)
    headers_left: list[str]   = field(default_factory=list)
    region_title: str | None  = None
    units: str | None         = None
    role: str | None          = None
    referenced_by: int        = 0

def annotate_node(sheet, addr):
    ws = SHEETS[sheet]
    cell = ws[addr]
    v = cell.value
    formula = v if isinstance(v, str) and v.startswith("=") else None
    const = None if formula else v
    above, left = nearest_headers(ws, addr)
    region = nearest_region_title(ws, addr)
    units = guess_units_from_headers(above + left)
    role = tag_role(sheet, cell, referenced_by_count=ref_by[(sheet, addr)])
    return CellMeta(
        sheet=sheet, addr=addr, formula=formula, constant=const,
        headers_above=above, headers_left=left,
        region_title=region, units=units, role=role,
        referenced_by=ref_by[(sheet, addr)]
    )

def backward_slice(target, max_nodes=4000):
    """Return list of nodes in slice (including target), topo-sorted."""
    # Reverse-BFS to collect upstream nodes
    seen = set([target])
    stack = [target]
    while stack and len(seen) < max_nodes:
        cur = stack.pop()
        preds = list(G.predecessors(cur))
        for p in preds:
            if p not in seen:
                seen.add(p)
                stack.append(p)
    # Subgraph topo sort
    SG = G.subgraph(seen).copy()
    # Ensure acyclic for PoC; if not, just return a DFS order
    try:
        order = list(nx.topological_sort(SG))
    except nx.NetworkXUnfeasible:
        order = list(nx.dfs_postorder_nodes(SG, target))
    return order, SG


In [None]:
# Cell 8
@dataclass
class IRNode:
    kind: str
    target: tuple  # (sheet, addr)
    expr: dict     # kind-specific payload
    meta: CellMeta

def recognize_motif(meta: CellMeta) -> IRNode | None:
    f = meta.formula
    if not f: 
        return None
    # SUMIFS(range, c1_range, k1, c2_range, k2, ...)
    if RE_SUMIFS.search(f):
        # naive parse: split top-level commas
        inner = f[f.index("(")+1:-1]
        parts = split_top_commas(inner)
        expr = {"op": "SUM_WHERE", "args": parts}
        return IRNode("SUM_WHERE", (meta.sheet, meta.addr), expr, meta)
    # XLOOKUP(key, keys, vals, ...)
    if RE_XLOOK.search(f):
        inner = f[f.index("(")+1:-1]
        parts = split_top_commas(inner)
        expr = {"op": "LOOKUP", "args": parts}
        return IRNode("LOOKUP", (meta.sheet, meta.addr), expr, meta)
    # INDEX(MATCH())
    if RE_INDEX.search(f) and RE_MATCH.search(f):
        expr = {"op": "LOOKUP_INDEX_MATCH", "raw": f}
        return IRNode("LOOKUP", (meta.sheet, meta.addr), expr, meta)
    if RE_SUMPROD.search(f):
        expr = {"op": "SUMPRODUCT", "raw": f}
        return IRNode("AGG", (meta.sheet, meta.addr), expr, meta)
    if f.upper().startswith("=IF("):
        expr = {"op": "CASE", "raw": f}
        return IRNode("CASE", (meta.sheet, meta.addr), expr, meta)
    # Fallback arithmetic
    return IRNode("ARITH", (meta.sheet, meta.addr), {"op": "ARITH", "raw": f}, meta)


def split_top_commas(s):
    """Split on commas not inside parentheses."""
    out, depth, buf = [], 0, []
    for ch in s:
        if ch == "(":
            depth += 1
        elif ch == ")":
            depth -= 1
        if ch == "," and depth == 0:
            out.append("".join(buf).strip())
            buf = []
        else:
            buf.append(ch)
    if buf:
        out.append("".join(buf).strip())
    return out


In [None]:
# Cell 9
def _usable_header(h: str) -> bool:
    if not h or not isinstance(h, str): return False
    s = h.strip()
    if s.startswith("="): return False            # ignore formulas
    if len(s) <= 1 and s in {"■", "█"}: return False  # ignore glyph-only
    # ignore super-cryptic headers packed with punctuation
    if sum(ch in s for ch in ":,()[]=<>$") > 4: return False
    return True

def pretty_name(meta: CellMeta):
    bits = []
    left = [h for h in meta.headers_left if _usable_header(h)]
    above = [h for h in meta.headers_above if _usable_header(h)]
    if left:  bits.append(left[-1])
    if above: bits.append(above[-1])
    if meta.region_title and _usable_header(meta.region_title):
        bits.append(meta.region_title.split(",")[0])
    label = " • ".join(bits).strip()
    if not label:
        label = f"{meta.sheet}!{meta.addr}"
    return re.sub(r"\s+", " ", label)[:100]

IF_TOP = re.compile(r"^=IF\(", re.I)

def parse_if_chain(formula: str) -> list[tuple[str, str]]:
    """Return list of (condition, result_expr) pairs and a final else."""
    # Very lightweight parse: split =IF(cond,then,else) at top-level commas/IFs
    s = formula.strip()
    if not IF_TOP.match(s): return []
    # remove leading '='
    s = s[1:]
    # peel successive IFs
    out = []
    rest = s
    while rest.upper().startswith("IF("):
        inner, tail = _unwrap_parens(rest[2:])  # content inside IF(...)
        parts = split_top_commas(inner)
        cond = parts[0] if parts else "?"
        then = parts[1] if len(parts) > 1 else "?"
        elsep = parts[2] if len(parts) > 2 else ""
        out.append((cond, then))
        rest = elsep.strip()
    # Final else
    if rest:
        out.append(("ELSE", rest))
    return out

def _unwrap_parens(s: str) -> tuple[str, str]:
    """Return (inside, rest_after_closing_paren) for a string starting with content of IF(...)."""
    depth = 1
    buf = []
    i = 0
    for i, ch in enumerate(s):
        if ch == "(":
            depth += 1
        elif ch == ")":
            depth -= 1
            if depth == 0:
                break
        buf.append(ch)
    inside = "".join(buf)
    rest = s[i+1:].lstrip()
    return inside, rest

def is_visual_marker_case(parts: list[tuple[str,str]]) -> bool:
    glyphs = {"\"██\"", "\"■\"", "\"\"", "\"●\"", "\"○\""}
    return any(res.strip() in glyphs for _, res in parts)

def explain_case(meta: CellMeta) -> str:
    parts = parse_if_chain(meta.formula or "")
    if not parts:
        return f"{pretty_name(meta)} := {meta.formula[1:] if meta.formula else meta.constant}"
    # Build a compact CASE-like line
    lines = []
    for cond, res in parts[:-1]:
        lines.append(f"when {cond} → {res}")
    if parts[-1][0] == "ELSE":
        lines.append(f"else → {parts[-1][1]}")
    case = "; ".join(lines)
    # Special friendly text for glyphs
    if is_visual_marker_case(parts):
        lines = []
        for cond, res in parts:
            if cond == "ELSE":
                lines.append("otherwise → (empty)")
            else:
                pretty = {"\"██\"": "full block", "\"■\"": "square", "\"\"": "empty"}.get(res.strip(), res)
                lines.append(f"{cond} → {pretty}")
        case = "; ".join(lines)
        return f"{pretty_name(meta)} (visual marker) := {case}"
    return f"{pretty_name(meta)} := CASE {{ {case} }}"


def emit_step(ir: IRNode):
    m = ir.meta
    if ir.kind == "CASE":
        return explain_case(m)
    label = pretty_name(m)
    if ir.kind == "SUM_WHERE":
        args = ir.expr["args"]
        # best-effort: first is sum_range; pairs thereafter are cond_range, key
        sum_range = args[0] if args else "range"
        conds = []
        for i in range(1, len(args), 2):
            conds.append(f"{args[i]} = {args[i+1] if i+1 < len(args) else '?'}")
        cond_txt = " AND ".join(conds) if conds else "/* no conditions parsed */"
        return f"{label} := SUM FROM {sum_range} WHERE {cond_txt}"
    if ir.kind == "LOOKUP":
        return f"{label} := LOOKUP(/* parsed */)  # {m.formula[:80]}…"
    if ir.kind == "CASE":
        return f"{label} := CASE(/* IF/ELSE ladder */)"
    if ir.kind == "AGG":
        return f"{label} := AGG(/* SUMPRODUCT */)"
    if ir.kind == "ARITH":
        return f"{label} := {m.formula[1:]}"
    return f"{label} := /* unknown */"

def render_narrative(target_meta: CellMeta, steps:list[str], inputs:list[CellMeta]):
    title = f"{pretty_name(target_meta)}"
    units = f" (units: {target_meta.units})" if target_meta.units else ""
    inputs_txt = "; ".join(f"{pretty_name(m)} [{m.sheet}!{m.addr}]"
                           for m in inputs[:6])
    return (
        f"{title}{units}\n\n"
        f"This output is computed in {target_meta.sheet}!{target_meta.addr}. "
        f"It is derived from {len(steps)} step(s). "
        f"Key inputs/controls: {inputs_txt or 'n/a'}.\n\n"
        f"Computation outline:\n  - " + "\n  - ".join(steps[:12]) +
        ("\n  - …" if len(steps) > 12 else "")
    )


In [None]:
import collections as _collections

def is_alias_formula(f: str) -> tuple[bool, tuple[str,str] | None]:
    # match =Sheet!A1 or =A1
    m = re.fullmatch(r"=\s*(?:'([^']+)'\!|([A-Za-z0-9_ ]+)\!)?\$?([A-Z]{1,3})\$?(\d+)\s*", f or "", flags=re.I)
    if not m: return False, None
    sheet = (m.group(1) or m.group(2))
    col, row = m.group(3), m.group(4)
    return True, (sheet.strip() if sheet else None, f"{col}{row}")

def collapse_aliases(ir_list: list[IRNode]) -> list[IRNode]:
    """Remove chains where a cell is just =OtherCell; keep the root source."""
    parent = {}
    for ir in ir_list:
        f = ir.meta.formula
        ok, ref = is_alias_formula(f) if f else (False, None)
        if ok:
            sh, addr = ref
            if sh is None: sh = ir.meta.sheet  # same-sheet ref
            parent[(ir.meta.sheet, ir.meta.addr)] = (sh, addr)

    def root(node):
        seen = set()
        while node in parent and node not in seen:
            seen.add(node)
            node = parent[node]
        return node

    collapsed = []
    for ir in ir_list:
        me = (ir.meta.sheet, ir.meta.addr)
        if me in parent and root(me) != me:
            # Drop pure alias nodes
            continue
        collapsed.append(ir)
    return collapsed

def group_repeated_products(ir_list: list[IRNode]) -> list[str]:
    """Replace long runs like =$I230*S!$P4*XX114 with one summary line."""
    pat = re.compile(r"^\=\s*\$?I\d+\s*\*\s*S\!\$?P4\s*\*\s*([A-Z]{1,3})114\s*$", re.I)
    bucket = _collections.defaultdict(list)
    other = []
    for ir in ir_list:
        if ir.meta.formula:
            m = pat.match(ir.meta.formula)
            if m:
                bucket["$I{row} * S!$P4 * {COL}114"].append((ir.meta.sheet, ir.meta.addr, m.group(1)))
                continue
        other.append(ir)
    lines = [emit_step(ir) for ir in other]
    for k, items in bucket.items():
        cols = sorted(set(col for _,_,col in items))
        sample = f"{items[0][0]}!{items[0][1]}"
        lines.append(f"{sample} := repeated product pattern `{k}` across {len(cols)} columns ({', '.join(cols[:12])}{'…' if len(cols)>12 else ''})")
    return lines


In [None]:
# Cell 10
# Choose a target cell
TARGET = ("O", "W18")

order, SG = backward_slice(TARGET)

order_clean = [(s, a) for (s, a) in order if s in SHEETS]
annotated = [annotate_node(s, a) for (s, a) in order_clean]

# Build IR, skip pure constants unless target
IR = [recognize_motif(m) for m in annotated if (m.formula or (m.sheet, m.addr) == TARGET)]
IR = [x for x in IR if x]                      
IR = collapse_aliases(IR)                       

# Emit steps with grouping for repeated products
steps = group_repeated_products(IR)

# Trim to something readable
MAX_STEPS = 40
steps = steps[:MAX_STEPS]

slice_nodes = set(order_clean)
inputs = []
for (s, a) in slice_nodes:
    meta = annotate_node(s, a)
    if meta.role in {"parameter", "control"}:
        inputs.append(meta)
inputs_sorted = sorted(inputs, key=lambda m: m.referenced_by, reverse=True)

tmeta = annotate_node(*TARGET)
narrative = render_narrative(tmeta, steps, inputs_sorted)
print(narrative)


In [None]:
def collect_nearby_text(ws, addr, rows_up=30, rows_down=3):
    r, c = a1_to_rc(addr)
    lines = []
    for rr in range(max(1, r-rows_up), min(ws.max_row, r+rows_down)+1):
        v = ws.cell(rr, c).value
        if isinstance(v, str) and v.strip():
            s = clean(v)
            if len(s) <= 300:
                lines.append(s[:300])
    # de-dup while preserving order
    seen, out = set(), []
    for s in lines:
        if s not in seen:
            seen.add(s); out.append(s)
    return out[:10]

def extract_case_mapping(meta: CellMeta):
    """Return a compact mapping for IF/CASE decisions if present."""
    if not meta.formula or not meta.formula.upper().startswith("=IF("):
        return None
    parts = parse_if_chain(meta.formula)
    if not parts: return None
    mapping = []
    for cond, res in parts:
        lab = {"\"██\"": "full block", "\"■\"": "square", "\"\"": "empty"}.get(res.strip(), res)
        mapping.append({"when": cond, "then": lab})
    return mapping

def build_param_catalog(slice_nodes):
    """Pick likely inputs/controls in the slice (with best-effort meanings)."""
    params = []
    for (s, a) in slice_nodes:
        m = annotate_node(s, a)
        if m.role in {"parameter","control"}:
            # try to capture small integer/text constants as display_value
            ws = SHEETS[s]
            val = ws[a].value
            display = None
            if isinstance(val, (int, float)) and -1000 <= val <= 1000:
                display = val
            elif isinstance(val, str) and not val.startswith("=") and len(val) <= 40:
                display = val
            params.append({
                "name": pretty_name(m),
                "sheet": s, "addr": a,
                "display_value": display,
                "region_title": m.region_title
            })
    # sort by fan-out
    params = sorted(params, key=lambda p: ref_by[(p["sheet"], p["addr"])], reverse=True)
    # dedup top ~12
    seen, out = set(), []
    for p in params:
        k = (p["sheet"], p["addr"])
        if k not in seen:
            seen.add(k); out.append(p)
        if len(out) >= 12: break
    return out


In [None]:
def make_llm_bundle(target, max_steps=20):
    sheet, addr = target
    ws = SHEETS[sheet]
    order, SG = backward_slice(target)
    # prune unknown sheets (defensive)
    order = [(s,a) for (s,a) in order if s in SHEETS]

    # annotate & IR
    metas = [annotate_node(s,a) for (s,a) in order]
    ir = [recognize_motif(m) for m in metas if m.formula or (m.sheet, m.addr)==target]
    ir = [x for x in ir if x]
    ir = collapse_aliases(ir)
    steps = group_repeated_products(ir)
    steps = steps[:max_steps]

    tmeta = annotate_node(*target)
    headers = [h for h in tmeta.headers_above + tmeta.headers_left if h and not str(h).startswith("=")]
    nearby = collect_nearby_text(ws, addr)
    params = build_param_catalog(set(order))
    decision_rules = extract_case_mapping(tmeta)

    # small motif summary for analyst context
    motif_counts = collections.Counter(x.kind for x in ir)

    bundle = {
        "target": {"sheet": sheet, "addr": addr, "pretty_name": pretty_name(tmeta)},
        "region_title": tmeta.region_title,
        "headers": headers[:6],
        "units": tmeta.units,
        "role": tmeta.role,
        "nearby_text": nearby,
        "motif_counts": dict(motif_counts),
        "controls": params,                        # likely inputs/controls
        "decision_rules": decision_rules,          # for IF/CASE displays
        "outline_steps": steps,                    # 5–20 collapsed steps
    }
    return bundle

In [None]:
TARGET = ("O","W18")
BUNDLE = make_llm_bundle(TARGET, max_steps=25)
print(json.dumps(BUNDLE, indent=2, ensure_ascii=False))

In [None]:
from genAI_utils.ask_gpt import get_chatgpt_response
from genAI_utils.air_genai_helper import initialize_urls_and_key, gpt_environment 

vault_url, token_provider, azure_endpoint = initialize_urls_and_key('ts', 'eastus2')
client = gpt_environment(vault_url, token_provider, azure_endpoint).client

LLM_SYSTEM = """You're an expert on HEA and the Livelihoods Impact Analysis Spreadsheet (LIAS). You're analyzing the Excel-derived algorithm bundles for the LIAS and writing short, grounded explanations.
Be concise, correct, and avoid speculation. Use only provided context. If something is unknown, say so."""

LLM_USER_TMPL = f"""
Explain in a narrative context how the target output is computed, based on the provided context.

{BUNDLE}
"""

In [24]:
#tracking variables
messages = []
total_tokens = 0
prompt_tokens = 0
completion_tokens = 0
all_responses = []

# add system context if provided
messages.append({"role": "system", "content": LLM_SYSTEM})
messages.append({"role": "user", "content": LLM_USER_TMPL})


all_params = {
    "model": 'gpt-4o',
    "messages": messages,
    "temperature": 0,
}

# Remove None values from the parameters
completion_params = {
    k: v for k, v in all_params.items()
}

response = client.chat.completions.create(**completion_params)
text = response.choices[0].message.content
    
print(text)

The target output is located in sheet 'O' at cell 'W18' and is referred to as 'O!W18'. This output is derived from various inputs and calculations across multiple sheets in the spreadsheet.

The computation involves several steps and controls:
1. **Controls**: Various cells from different sheets (e.g., 'B!FL233', 'S!V4', 'S!N74', etc.) provide input values that influence the calculation. These controls have specific display values that are used in the computation.
2. **Decision Rules**: There is a conditional rule based on the value in cell 'B!IC287'. If 'B!IC287' equals 2, it results in "██", and if it equals 1, it results in "■".
3. **Outline Steps**: The computation involves summing values from specific ranges and multiplying by a control value (e.g., 'P!I9 := SUM(I34)*$D9'). These steps are repeated for various columns (I, K, Q, O, H, N, etc.) across different sheets.
4. **Region Title**: The region title is determined by the value in 'B!IC286'. If 'B!IC286' equals 2, it results in