# Phase 1 — Config-Driven LP + Corrected Validator\nFill CSVs with real values; no fabricated coefficients. Validate production-only metrics and view totals with exogenous incomes separately.

In [1]:
import json, yaml
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple
from pathlib import Path
import numpy as np, pandas as pd
from scipy.optimize import linprog
import ace_tools_open as tools
display_dataframe_to_user = tools.display_dataframe_to_user
CFG = yaml.safe_load(open('config.yaml','r'))
DATA = Path('./cfg_data')
def must_have(path):
    if not path.exists():
        raise FileNotFoundError(f'Missing required file: {path}')
    return path
hh_df = pd.read_csv(must_have(DATA / 'households.csv'))
crops_df = pd.read_csv(must_have(DATA / 'crops.csv'))
livest_df = pd.read_csv(must_have(DATA / 'livestock.csv'))
prices_df = pd.read_csv(must_have(DATA / 'prices.csv'))
exog_df = pd.read_csv(must_have(DATA / 'exogenous_income.csv'))
obs_prod_df = pd.read_csv(must_have(DATA / 'observed_prod_only.csv'))


# Remove blank or whitespace-only name rows and trim names
def _clean(df):
    if "name" in df.columns:
        df["name"] = df["name"].astype(str).str.strip()
        df = df[df["name"] != ""]
    return df


hh_df = _clean(hh_df)
crops_df = _clean(crops_df)
livest_df = _clean(livest_df)

for name, df in [('households.csv', hh_df), ('crops.csv', crops_df), ('livestock.csv', livest_df), ('prices.csv', prices_df), ('exogenous_income.csv', exog_df), ('observed_prod_only.csv', obs_prod_df)]:
    try:
        display_dataframe_to_user(name, df)
    except Exception:
        display(df)
print('Loaded data files. Fill blanks if any.')

households.csv


0
Loading ITables v2.5.2 from the internet...  (need help?)


crops.csv


0
Loading ITables v2.5.2 from the internet...  (need help?)


livestock.csv


0
Loading ITables v2.5.2 from the internet...  (need help?)


prices.csv


0
Loading ITables v2.5.2 from the internet...  (need help?)


exogenous_income.csv


0
Loading ITables v2.5.2 from the internet...  (need help?)


observed_prod_only.csv


0
Loading ITables v2.5.2 from the internet...  (need help?)


Loaded data files. Fill blanks if any.


In [2]:
# Self-contained schemas + loader (safe to run standalone)
import yaml, pandas as pd
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple
from pathlib import Path

# === Dataclasses ===
@dataclass
class HouseholdClass:
    name: str
    n_households: float
    adult_equiv: float
    labor_endowment: float
    land_available: float
    max_hired_labor: float

@dataclass
class CropParam:
    name: str
    calorie_per_kg: float
    yield_per_ha: float
    price_sale: float
    seed_cost_per_ha: float
    fert_cost_per_ha: float
    chem_cost_per_ha: float
    labor_req_per_ha: float

@dataclass
class LivestockParam:
    name: str
    price_sale: float
    feed_cost_per_unit: float
    vet_cost_per_unit: float
    labor_req_per_unit: float

@dataclass
class PriceParam:
    wage: float

@dataclass
class ScenarioShocks:
    yield_multiplier: float = 1.0
    crop_price_multiplier: float = 1.0
    wage_multiplier: float = 1.0
    fert_price_multiplier: float = 1.0
    population_multiplier: float = 1.0

@dataclass
class ModelParams:
    households: Dict[str, HouseholdClass]
    crops: Dict[str, CropParam]
    livestock: Dict[str, LivestockParam]
    prices: PriceParam
    min_kcal_per_person_per_day: float
    days_per_year: int

def load_params(CFG, hh_df, crops_df, livest_df, prices_df, exog_df) -> Tuple[ModelParams, ScenarioShocks, pd.DataFrame]:
    demo_mode = bool(CFG.get('demo_mode', False))
    demo = None
    if demo_mode:
        demo = yaml.safe_load(open('./cfg_data/demo_defaults.yaml','r'))

    def normalize_names(df):
        if 'name' in df.columns:
            df = df.copy()
            df['name'] = df['name'].astype(str).str.strip()
        return df

    # Helper: merge CSV with demo on 'name' and fill blanks from demo
    def fill_from_demo(csv_df, demo_list, required_cols):
        csv_df = normalize_names(csv_df)
        demo_df = pd.DataFrame(demo_list) if demo_list is not None else pd.DataFrame(columns=['name'])
        demo_df = normalize_names(demo_df)

        if (not demo_mode) or (csv_df.empty and demo_df.empty):
            return csv_df

        # If CSV missing 'name' or empty, return full demo
        if ('name' not in csv_df.columns) or csv_df.empty or csv_df['name'].eq('').all():
            return demo_df

        merged = pd.merge(demo_df, csv_df, on='name', how='outer', suffixes=('_demo',''))
        # for each required col, prefer CSV value, else demo
        for col in required_cols:
            demo_col = f"{col}_demo" if f"{col}_demo" in merged.columns else col
            if col not in merged.columns and demo_col in merged.columns:
                merged[col] = merged[demo_col]
            else:
                merged[col] = merged[col].where(merged[col].notna() & (merged[col].astype(str).str.strip()!=''), merged[demo_col])
        # keep only required + name
        keep = ['name'] + required_cols
        return merged[keep]

    # Load base CSVs
    hh_csv = hh_df.copy()
    crops_csv = crops_df.copy()
    livest_csv = livest_df.copy()
    prices_csv = prices_df.copy()

    if demo_mode and demo is not None:
        hh_csv = fill_from_demo(hh_csv, demo.get('households', []), ['n_households','adult_equiv','labor_endowment','land_available','max_hired_labor'])
        crops_csv = fill_from_demo(crops_csv, demo.get('crops', []), ['calorie_per_kg','yield_per_ha','price_sale','seed_cost_per_ha','fert_cost_per_ha','chem_cost_per_ha','labor_req_per_ha'])
        livest_csv = fill_from_demo(livest_csv, demo.get('livestock', []), ['price_sale','feed_cost_per_unit','vet_cost_per_unit','labor_req_per_unit'])
        # prices has single row; if missing wage, take from demo
        if prices_csv.empty or str(prices_csv.iloc[0].get('wage','')).strip()=='':
            prices_csv = pd.DataFrame([demo.get('prices', {})])

    def req_float(x, field):
        if pd.isna(x) or str(x).strip()=='':
            raise ValueError(f'Missing value for {field}')
        return float(x)

    households = {r['name']: HouseholdClass(
        name=r['name'],
        n_households=req_float(r['n_households'],'households.n_households'),
        adult_equiv=req_float(r['adult_equiv'],'households.adult_equiv'),
        labor_endowment=req_float(r['labor_endowment'],'households.labor_endowment'),
        land_available=req_float(r['land_available'],'households.land_available'),
        max_hired_labor=req_float(r['max_hired_labor'],'households.max_hired_labor'),
    ) for _, r in hh_csv.iterrows()}

    crops = {r['name']: CropParam(
        name=r['name'],
        calorie_per_kg=req_float(r['calorie_per_kg'],'crops.calorie_per_kg'),
        yield_per_ha=req_float(r['yield_per_ha'],'crops.yield_per_ha'),
        price_sale=req_float(r['price_sale'],'crops.price_sale'),
        seed_cost_per_ha=req_float(r['seed_cost_per_ha'],'crops.seed_cost_per_ha'),
        fert_cost_per_ha=req_float(r['fert_cost_per_ha'],'crops.fert_cost_per_ha'),
        chem_cost_per_ha=req_float(r['chem_cost_per_ha'],'crops.chem_cost_per_ha'),
        labor_req_per_ha=req_float(r['labor_req_per_ha'],'crops.labor_req_per_ha'),
    ) for _, r in crops_csv.iterrows()}

    livest = {r['name']: LivestockParam(
        name=r['name'],
        price_sale=req_float(r['price_sale'],'livestock.price_sale'),
        feed_cost_per_unit=req_float(r['feed_cost_per_unit'],'livestock.feed_cost_per_unit'),
        vet_cost_per_unit=req_float(r['vet_cost_per_unit'],'livestock.vet_cost_per_unit'),
        labor_req_per_unit=req_float(r['labor_req_per_unit'],'livestock.labor_req_per_unit'),
    ) for _, r in livest_csv.iterrows()}

    prices_row = prices_csv.iloc[0].to_dict()
    prices = PriceParam(wage=req_float(prices_row['wage'],'prices.wage'))

    scenario = ScenarioShocks()
    params = ModelParams(households=households, crops=crops, livestock=livest, prices=prices,
                         min_kcal_per_person_per_day=float(CFG['min_kcal_per_person_per_day']), days_per_year=int(CFG['days_per_year']))
    return params, scenario, exog_df


In [3]:
# Seed from demo defaults (optional helper)
# Run this cell once to overwrite CSVs with demo defaults (useful if your CSVs are blank).
import yaml, pandas as pd
from pathlib import Path

demo = yaml.safe_load(open('./cfg_data/demo_defaults.yaml','r'))
DATA = Path('./data')

pd.DataFrame(demo['households']).to_csv(DATA/'households.csv', index=False)
pd.DataFrame(demo['crops']).to_csv(DATA/'crops.csv', index=False)
pd.DataFrame(demo['livestock']).to_csv(DATA/'livestock.csv', index=False)
pd.DataFrame([demo['prices']]).to_csv(DATA/'prices.csv', index=False)

print('Seeded households.csv, crops.csv, livestock.csv, prices.csv from demo_defaults.yaml')


Seeded households.csv, crops.csv, livestock.csv, prices.csv from demo_defaults.yaml


In [4]:
def build_index_maps(H, C, L):
    idx = {}; pos = 0
    for h in H:
        for c in C: idx[('area', h, c)] = pos; pos += 1
    for h in H:
        for c in C: idx[('cons', h, c)] = pos; pos += 1
    for h in H:
        for c in C: idx[('sold', h, c)] = pos; pos += 1
    for h in H:
        for c in C: idx[('stored', h, c)] = pos; pos += 1
    for h in H: idx[('hired', h, None)] = pos; pos += 1
    for h in H: idx[('off_farm', h, None)] = pos; pos += 1
    for h in H:
        for l in L: idx[('live_units', h, l)] = pos; pos += 1
    return idx, pos
from numpy import zeros
from numpy import array as nparray
def solve_lp(params, scenario):
    H = list(params.households.keys())
    C = list(params.crops.keys())
    L = list(params.livestock.keys())
    idx, nvars = build_index_maps(H, C, L)
    c = np.zeros(nvars)
    ym=1.0; pm=1.0; wm=1.0; fm=1.0
    for h in H:
        c[idx[('hired', h, None)]] += (params.prices.wage*wm)
        c[idx[('off_farm', h, None)]] += -(params.prices.wage*wm)
        for cn in C:
            cp = params.crops[cn]
            c[idx[('sold', h, cn)]] += -(cp.price_sale*pm)
            c[idx[('area', h, cn)]] += (cp.seed_cost_per_ha + cp.fert_cost_per_ha + cp.chem_cost_per_ha)
        for l in L:
            lv = params.livestock[l]
            c[idx[('live_units', h, l)]] += (lv.feed_cost_per_unit + lv.vet_cost_per_unit) - lv.price_sale
    A_eq, b_eq, A_ub, b_ub = [], [], [], []
    for h in H:
        for cn in C:
            row = zeros(nvars); row[idx[('area', h, cn)]] = params.crops[cn].yield_per_ha*ym
            row[idx[('cons', h, cn)]] = -1; row[idx[('sold', h, cn)]] = -1; row[idx[('stored', h, cn)]] = -1
            A_eq.append(row); b_eq.append(0.0)
    for h in H:
        row = zeros(nvars)
        for cn in C: row[idx[('area', h, cn)]] = 1
        A_ub.append(row); b_ub.append(params.households[h].land_available)
    for h in H:
        row = zeros(nvars)
        for cn in C: row[idx[('area', h, cn)]] = params.crops[cn].labor_req_per_ha
        for l in L: row[idx[('live_units', h, l)]] = params.livestock[l].labor_req_per_unit
        row[idx[('off_farm', h, None)]] = 1; row[idx[('hired', h, None)]] = -1
        A_ub.append(row); b_ub.append(params.households[h].labor_endowment)
    for h in H:
        row = zeros(nvars); row[idx[('hired', h, None)]] = 1
        A_ub.append(row); b_ub.append(params.households[h].max_hired_labor)
    for h in H:
        row = zeros(nvars)
        for cn in C: row[idx[('cons', h, cn)]] = -params.crops[cn].calorie_per_kg
        kcal_need = params.min_kcal_per_person_per_day*params.households[h].adult_equiv*params.days_per_year
        A_ub.append(row); b_ub.append(-kcal_need)
    res = linprog(c, A_ub=nparray(A_ub), b_ub=nparray(b_ub), A_eq=nparray(A_eq), b_eq=nparray(b_eq), bounds=[(0,None)]*nvars, method='highs')
    return res, idx, H, C, L

In [5]:
# Validate config & data before solving
import pandas as pd, numpy as np

issues = []

# Check names in households
if hh_df.empty or 'name' not in hh_df.columns:
    issues.append('households.csv is empty or missing name column')
else:
    if hh_df['name'].astype(str).str.strip().eq('').any():
        issues.append('households.csv has blank name entries')

# Check crops
if crops_df.empty or 'name' not in crops_df.columns:
    issues.append('crops.csv is empty or missing name column')
# Check livestock
if livest_df.empty or 'name' not in livest_df.columns:
    issues.append('livestock.csv is empty or missing name column')

# Check wage
if prices_df.empty or str(prices_df.iloc[0].get('wage','')).strip()=='':
    issues.append('prices.csv missing wage')

# Print issues or OK
if issues:
    print('⚠️ Please fix before solving:')
    for i in issues:
        print(' -', i)
else:
    print('✅ Basic structure checks passed. If demo_mode:true, blanks will be filled from demo_defaults.yaml.')


✅ Basic structure checks passed. If demo_mode:true, blanks will be filled from demo_defaults.yaml.


In [6]:
params, scenario, exog = load_params(CFG, hh_df, crops_df, livest_df, prices_df, exog_df)
res, idx, H, C, L = solve_lp(params, scenario)
print('Status:', res.message)
print('Success:', bool(res.success))
if not res.success: raise RuntimeError('Optimization failed — check CSV inputs.')
x = res.x
val = lambda kind,h,k=None: float(x[idx[(kind,h,k)]]) if (kind,h,k) in idx else 0.0
rows=[]
for h in H:
    rev_crops = sum(val('sold', h, cn)*params.crops[cn].price_sale for cn in C)
    cost_crop_inputs = sum(val('area', h, cn)*(params.crops[cn].seed_cost_per_ha + params.crops[cn].fert_cost_per_ha + params.crops[cn].chem_cost_per_ha) for cn in C)
    rev_livestock = sum(val('live_units', h, l)*params.livestock[l].price_sale for l in L)
    cost_livestock = sum(val('live_units', h, l)*(params.livestock[l].feed_cost_per_unit + params.livestock[l].vet_cost_per_unit) for l in L)
    off = val('off_farm', h)*params.prices.wage
    hired = val('hired', h)*params.prices.wage
    rows.append({'household_class':h,'rev_crops':rev_crops,'rev_livestock':rev_livestock,'off_farm_labor':off,'cost_crop_inputs':cost_crop_inputs,'cost_livestock':cost_livestock,'cost_hired_labor':hired})
model_df = pd.DataFrame(rows)
exog['exogenous_total'] = exog[['gov_transfer','own_business','private_transfers','psnp','rent_out']].sum(axis=1)
ctx = model_df.merge(exog[['household_class','exogenous_total']], on='household_class', how='left')
ctx['income_total_incl_exogenous'] = ctx['rev_crops']+ctx['rev_livestock']+ctx['off_farm_labor']+ctx['exogenous_total']
try: display_dataframe_to_user('Model (production-only) + exogenous totals', ctx)
except Exception: display(ctx)
ctx.to_json('./outputs/model_context.json', orient='records', indent=2)
print('Saved ./outputs/model_context.json')

ValueError: Missing value for households.n_households

In [None]:
# Corrected Validator\nobs = pd.read_csv('./data/observed_prod_only.csv')\nmetrics = ['rev_crops','rev_livestock','off_farm_labor','cost_crop_inputs','cost_livestock','cost_hired_labor']\nmerged = model_df.merge(obs[['household_class']+metrics], on='household_class', suffixes=('_model','_obs'), how='outer')\n\nimport numpy as np\n\ndef pct_diff(m, o):\n    import pandas as pd\n    if pd.isna(m) or pd.isna(o): return float('nan')\n    if o == 0: return float('inf') if m != 0 else 0.0\n    return 100.0 * (m - o) / abs(o)\n\nrows=[]\nfor _, r in merged.iterrows():\n    for m in metrics:\n        rows.append({'household_class':r['household_class'],'metric':m,'observed':r[f'{m}_obs'],'model':r[f'{m}_model'],'diff':(r[f'{m}_model']-r[f'{m}_obs']) if pd.notna(r[f'{m}_model']) and pd.notna(r[f'{m}_obs']) else float('nan'),'pct_diff_%': pct_diff(r[f'{m}_model'], r[f'{m}_obs'])})\ncomp = pd.DataFrame(rows)\ntry: display_dataframe_to_user('Validator — Production-only metrics', comp)\nexcept Exception: display(comp)\n\nagg=[]\nfor m in metrics:\n    sub = comp[comp['metric']==m].copy().replace([np.inf,-np.inf], np.nan).dropna(subset=['observed','model'])\n    if len(sub)==0: rmse=mape=np.nan\n    else:\n        rmse = float(np.sqrt(np.mean((sub['model']-sub['observed'])**2)))\n        denom = sub['observed'].replace(0, np.nan)\n        mape = float(np.mean(np.abs((sub['model']-sub['observed'])/denom))*100.0)\n    agg.append({'metric':m,'RMSE':rmse,'MAPE_%':mape})\nagg_df = pd.DataFrame(agg)\ntry: display_dataframe_to_user('Validator — Summary (production-only)', agg_df)\nexcept Exception: display(agg_df)