In [10]:
import pandas as pd
import numpy as np
import os
import shutil
import time
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from tqdm import tqdm
from datetime import datetime
import glob

# ==========================================
# 1. CONFIGURATION & SCHEMA
# ==========================================

LOCATIONS_FILE = 'locations_dummy.csv'
ALLOCATIONS_FILE = 'allocations.csv'
PARTS_FILE = 'synthetic_parts_generated.csv'
OUTPUT_DIR = 'validation_results'
MAX_EXECUTION_TIME_SEC = 300

REQUIRED_SCHEMA = {
    'LOCATIONS': {
        'file': LOCATIONS_FILE,
        # Ensure your locations file actually has these columns
        'columns': ['loc_inst_code', 'width', 'depth', 'height', 'x', 'y', 'z']
    },
    'ALLOCATIONS': {
        'file': ALLOCATIONS_FILE,
        # UPDATED COLUMN LIST TO MATCH YOUR NEW FORMAT
        'columns': ['loc_inst_code', 'LOCATION_TYPE', 'ITEM_ID', 
                    'QTY_ALLOCATED', 'MAX_UNITS', 
                    'GRID_X', 'GRID_Y', 'GRID_Z', 
                    'FULL_LAYERS', 'PARTIAL_UNITS',
                    'ORIENT_X_MM', 'ORIENT_Y_MM', 'ORIENT_Z_MM', 
                    'LOCATION_VOL_MM3', 'LOCATION_VOL_M3', 
                    'STORED_VOL_M3', 'UTILIZATION_PCT'] 
    },
    'PARTS': {
        'file': PARTS_FILE,
        'columns': ['ITEM_ID', 'LEN_MM', 'WID_MM', 'DEP_MM', 'WT_KG', 
                    'BOXES_ON_HAND', 'QTY_PER_BOX']
    }
}

# ==========================================
# 2. LOGGING & UTILS
# ==========================================

class Colors:
    GREEN = '\033[92m'
    YELLOW = '\033[93m'
    RED = '\033[91m'
    CYAN = '\033[96m'
    RESET = '\033[0m'
    BOLD = '\033[1m'

report_buffer = []

def log(message):
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    report_buffer.append(f"[{timestamp}] {message}")
    
    c_msg = message
    if "PASS" in message: c_msg = message.replace("PASS", f"{Colors.GREEN}{Colors.BOLD}PASS{Colors.RESET}")
    elif "FAIL" in message: c_msg = message.replace("FAIL", f"{Colors.RED}{Colors.BOLD}FAIL{Colors.RESET}")
    elif "CRITICAL" in message: c_msg = f"{Colors.RED}{Colors.BOLD}{message}{Colors.RESET}"
    elif "WARN" in message: c_msg = message.replace("WARN", f"{Colors.YELLOW}{Colors.BOLD}WARN{Colors.RESET}")
    elif "---" in message: c_msg = f"{Colors.CYAN}{message}{Colors.RESET}"
    
    print(f"[{timestamp}] {c_msg}")

def setup_environment():
    if os.path.exists(OUTPUT_DIR):
        shutil.rmtree(OUTPUT_DIR)
    os.makedirs(OUTPUT_DIR)
    log(f"Output folder '{OUTPUT_DIR}' ready.")

# ==========================================
# 3. DATA LOADING
# ==========================================

def load_and_validate_dataset(key, config):
    filepath = config['file']
    log(f"--- Loading {key} ({filepath}) ---")
    
    if not os.path.exists(filepath):
        log(f"CRITICAL: File {filepath} not found.")
        return None, False

    try:
        df = pd.read_csv(filepath, sep=None, engine='python', dtype=str)
        df.columns = df.columns.str.strip().str.replace('^ï»¿', '', regex=True)
        
        # Add Original Row ID
        df['ROW_ID'] = df.index + 2 

        missing = [c for c in config['columns'] if c not in df.columns]
        if missing:
            log(f"CRITICAL SCHEMA ERROR in {key}. Missing: {missing}")
            return df, False
        
        log(f"SUCCESS: {key} loaded ({len(df)} rows).")
        return df, True
    except Exception as e:
        log(f"CRITICAL ERROR reading {filepath}: {e}")
        return None, False

def convert_numeric(df, cols):
    for col in cols:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
    return df

def load_all_data():
    setup_environment()
    datasets = {}
    valid_flags = {}
    
    # Locations
    df, valid = load_and_validate_dataset('LOCATIONS', REQUIRED_SCHEMA['LOCATIONS'])
    if valid: df = convert_numeric(df, ['width', 'depth', 'height', 'x', 'y', 'z'])
    datasets['LOCATIONS'] = df
    valid_flags['LOCATIONS'] = valid

    # Allocations
    df, valid = load_and_validate_dataset('ALLOCATIONS', REQUIRED_SCHEMA['ALLOCATIONS'])
    if valid: 
        # UPDATED NUMERIC COLUMNS
        df = convert_numeric(df, ['GRID_X', 'GRID_Y', 'GRID_Z', 
                                  'ORIENT_X_MM', 'ORIENT_Y_MM', 'ORIENT_Z_MM', 
                                  'MAX_UNITS', 'QTY_ALLOCATED', 
                                  'LOCATION_VOL_MM3', 'UTILIZATION_PCT'])
    datasets['ALLOCATIONS'] = df
    valid_flags['ALLOCATIONS'] = valid

    # Parts
    df, valid = load_and_validate_dataset('PARTS', REQUIRED_SCHEMA['PARTS'])
    if valid: df = convert_numeric(df, ['LEN_MM', 'WID_MM', 'DEP_MM', 'WT_KG', 
                                        'BOXES_ON_HAND', 'QTY_PER_BOX'])
    datasets['PARTS'] = df
    valid_flags['PARTS'] = valid
    
    return datasets, valid_flags

# ==========================================
# 4. VALIDATION LOGIC
# ==========================================

def estimate_and_sample(df, check_name, validation_func, *args):
    log(f"Starting {check_name}...")
    total = len(df)
    test_size = min(1000, total)
    if test_size == 0: return pd.DataFrame()

    t0 = time.time()
    try:
        _ = validation_func(df.head(test_size), *args, quiet=True)
    except Exception as e:
        log(f"ERROR in {check_name}: {e}")
        # raise e # Uncomment for debugging
        return pd.DataFrame()
        
    dur = time.time() - t0
    if dur == 0: dur = 0.001
    est_time = (dur / test_size) * total
    log(f"   Est. time: {est_time:.2f}s")

    if est_time > MAX_EXECUTION_TIME_SEC:
        safe_rows = int((MAX_EXECUTION_TIME_SEC / dur) * test_size)
        log(f"   WARN: Time limit exceeded. Sampling {safe_rows} rows.")
        df_to_process = df.sample(n=safe_rows, random_state=42)
    else:
        df_to_process = df

    return validation_func(df_to_process, *args, quiet=False)

# --- CHECK: Stated Utilization Accuracy ---
# --- CHECK: Stated Utilization Accuracy ---
def check_stated_utilization(df_alloc, df_loc, quiet=False):
    col_name = 'UTILIZATION_PCT'
    
    if col_name not in df_alloc.columns:
        if not quiet: log(f"WARN: Column '{col_name}' not found.")
        return pd.DataFrame()

    if not quiet: log(f"INFO: Checking utilization accuracy using column: '{col_name}'")

    merged = df_alloc.merge(df_loc, on='loc_inst_code', how='left', suffixes=('', '_LOC'))
    
    issues = []
    iterator = tqdm(merged.iterrows(), total=merged.shape[0]) if not quiet else merged.iterrows()
    
    for idx, row in iterator:
        if pd.isna(row['width']): continue

        # --- FIX STARTS HERE ---
        
        # 1. Calculate volume of ONE unit based on its orientation
        one_unit_vol = row['ORIENT_X_MM'] * row['ORIENT_Y_MM'] * row['ORIENT_Z_MM']
        
        # 2. Multiply by the ACTUAL quantity in the bin (not the Grid capacity)
        actual_occupied_vol = one_unit_vol * row['QTY_ALLOCATED']
        
        # --- FIX ENDS HERE ---
        
        loc_vol = row['width'] * row['depth'] * row['height']
        
        if loc_vol <= 0: continue

        # Calculate percentage based on actual inventory
        calc_pct = (actual_occupied_vol / loc_vol) * 100
        stated_pct = row[col_name]

        # Tolerance: 1% difference
        if abs(calc_pct - stated_pct) > 1.0:
            issues.append({
                'ROW_ID': row['ROW_ID'],
                'loc_inst_code': row['loc_inst_code'],
                'Issue': 'Utilization Data Mismatch',
                # Updated error message to be clearer
                'Details': f"Stated: {stated_pct:.1f}% != Calc: {calc_pct:.1f}% (Qty {row['QTY_ALLOCATED']} * Vol / Bin Vol)"
            })
            
    return pd.DataFrame(issues)

# --- CHECK: Stack vs Bin Dimensions (Geometric Fit) ---
def check_stack_fit(df_alloc, df_loc, quiet=False):
    # UPDATED: Merge on 'loc_inst_code'
    merged = df_alloc.merge(df_loc, on='loc_inst_code', how='left', suffixes=('', '_LOC'))
    issues = []
    
    iterator = tqdm(merged.iterrows(), total=merged.shape[0]) if not quiet else merged.iterrows()
    
    for idx, row in iterator:
        if pd.isna(row['width']): continue 

        stack_x = row['GRID_X'] * row['ORIENT_X_MM']
        stack_y = row['GRID_Y'] * row['ORIENT_Y_MM']
        stack_z = row['GRID_Z'] * row['ORIENT_Z_MM']
        
        tol = 1.0
        if (stack_x > row['width'] + tol) or (stack_y > row['depth'] + tol) or (stack_z > row['height'] + tol):
            issues.append({
                'ROW_ID': row['ROW_ID'],
                'loc_inst_code': row['loc_inst_code'],
                'ITEM_ID': row['ITEM_ID'], # Updated from SKU
                'Issue': 'Stack Exceeds Bin',
                'Details': f"Bin: {row['width']}x{row['depth']}x{row['height']} | Stack: {stack_x:.1f}x{stack_y:.1f}x{stack_z:.1f}"
            })
    return pd.DataFrame(issues)

# --- CHECK: Inventory Balance (Partial Unallocated) ---
def check_inventory_balance(df_alloc, df_parts):
    log("--- Check: Inventory Balance (Partial Unallocated) ---")
    
    # FIX: Removed * df_parts['QTY_PER_BOX'] to ensure we count boxes, not contents
    df_parts['TOTAL_DEMAND'] = df_parts['BOXES_ON_HAND'] 
    
    demand_map = df_parts.set_index('ITEM_ID')['TOTAL_DEMAND'].to_dict()
    
    # Ensure numeric
    df_alloc['QTY_ALLOCATED'] = pd.to_numeric(df_alloc['QTY_ALLOCATED'], errors='coerce').fillna(0)
    alloc_sums = df_alloc.groupby('ITEM_ID')['QTY_ALLOCATED'].sum().to_dict()
    
    issues = []
    for item_id, demand in demand_map.items():
        allocated = alloc_sums.get(item_id, 0)
        
        # Use simple float tolerance just in case
        if allocated < (demand - 0.01):
            issues.append({'ITEM_ID': item_id, 'Issue': 'Partial/No Allocation', 'Demand': demand, 'Allocated': allocated, 'Missing': demand - allocated})
        elif allocated > (demand + 0.01):
            issues.append({'ITEM_ID': item_id, 'Issue': 'Over-Allocation', 'Demand': demand, 'Allocated': allocated, 'Excess': allocated - demand})
            
    df_issues = pd.DataFrame(issues)
    if not df_issues.empty:
        log(f"FAIL: Found {len(df_issues)} Items with inventory mismatches.")
        df_issues.to_csv(f"{OUTPUT_DIR}/fail_inventory_balance.csv", index=False)
    else:
        log("PASS: Total allocated boxes match inventory on hand.")

# --- CHECK: Volume Data Integrity ---
def check_volume_data_integrity(df_alloc, df_loc, quiet=False):
    # UPDATED: Merge on loc_inst_code
    merged = df_alloc.merge(df_loc, on='loc_inst_code', how='left', suffixes=('', '_LOC'))
    issues = []
    iterator = tqdm(merged.iterrows(), total=merged.shape[0]) if not quiet else merged.iterrows()
    for idx, row in iterator:
        if pd.isna(row['width']): continue
        real_vol = row['width'] * row['depth'] * row['height']
        
        # Using LOCATION_VOL_MM3 from new format
        if abs(real_vol - row['LOCATION_VOL_MM3']) > 1.0:
            issues.append({
                'ROW_ID': row['ROW_ID'],
                'loc_inst_code': row['loc_inst_code'],
                'Issue': 'Volume Data Error',
                'Details': f"Stated: {row['LOCATION_VOL_MM3']} != Real: {real_vol}"
            })
    return pd.DataFrame(issues)

# --- CHECK: Grid Math ---
def check_grid_consistency(df_alloc, quiet=False):
    issues = []
    iterator = tqdm(df_alloc.iterrows(), total=df_alloc.shape[0]) if not quiet else df_alloc.iterrows()
    for idx, row in iterator:
        grid_cap = row['GRID_X'] * row['GRID_Y'] * row['GRID_Z']
        if abs(grid_cap - row['MAX_UNITS']) > 0.1:
            issues.append({
                'ROW_ID': row['ROW_ID'], 
                'loc_inst_code': row['loc_inst_code'], 
                'Issue': 'Grid Math Mismatch', 
                'Details': f"Grid ({grid_cap}) != MaxUnits ({row['MAX_UNITS']})"
            })
    return pd.DataFrame(issues)

# --- CHECK: Rigid Body ---
def check_rigid_body(df_alloc, df_parts, quiet=False):
    # UPDATED: Merge on ITEM_ID
    merged = df_alloc.merge(df_parts, on='ITEM_ID', how='left', suffixes=('', '_PART'))
    issues = []
    iterator = tqdm(merged.iterrows(), total=merged.shape[0]) if not quiet else merged.iterrows()
    for idx, row in iterator:
        if pd.isna(row['LEN_MM']): continue
        od = sorted([row['ORIENT_X_MM'], row['ORIENT_Y_MM'], row['ORIENT_Z_MM']])
        pd_ = sorted([row['LEN_MM'], row['WID_MM'], row['DEP_MM']])
        if any(abs(o - p) > 0.5 for o, p in zip(od, pd_)):
            issues.append({
                'ROW_ID': row['ROW_ID'], 
                'loc_inst_code': row['loc_inst_code'], 
                'Issue': 'Dimensions Morphing', 
                'Details': f"Alloc {od} vs Part {pd_}"
            })
    return pd.DataFrame(issues)

# --- CHECK: Bin Overlap ---
def check_bin_overlaps(df_loc, quiet=False):
    df = df_loc.copy()
    df['x2'] = df['x'] + df['width']
    df['y2'] = df['y'] + df['depth']
    df['z2'] = df['z'] + df['height']
    df.sort_values('x', inplace=True)
    issues = []
    data = df.to_dict('records')
    n = len(data)
    if n == 0: return pd.DataFrame()
    iter_range = tqdm(range(n), total=n) if not quiet else range(n)
    for i in iter_range:
        a = data[i]
        for j in range(i + 1, n):
            b = data[j]
            if b['x'] >= a['x2'] - 0.1: break
            if (a['y'] < b['y2']) and (a['y2'] > b['y']) and (a['z'] < b['z2']) and (a['z2'] > b['z']):
                issues.append({'LOC_A': a['loc_inst_code'], 'LOC_B': b['loc_inst_code'], 'Issue': 'Physical Overlap', 'Details': f"A({a['x']}) vs B({b['x']})"})
    return pd.DataFrame(issues)

# --- CHECK: Unallocated Items ---
def check_unallocated_feasibility(df_parts, df_alloc, df_loc):
    log("--- Check: Unallocated Feasibility ---")
    all_items = set(df_parts['ITEM_ID'])
    alloc_items = set(df_alloc['ITEM_ID'])
    unallocated = list(all_items - alloc_items)
    
    if not unallocated:
        log("PASS: All items are allocated.")
        return
    log(f"INFO: {len(unallocated)} items are Unallocated. Checking if they fit in empty bins...")
    
    occupied_locs = set(df_alloc['loc_inst_code'])
    empty_locs = df_loc[~df_loc['loc_inst_code'].isin(occupied_locs)].copy()
    
    if empty_locs.empty:
        log("WARN: Unallocated items exist, but NO empty bins are available.")
        return
    empty_locs['vol'] = empty_locs['width'] * empty_locs['depth'] * empty_locs['height']
    empty_locs['max_dim'] = empty_locs[['width', 'depth', 'height']].max(axis=1)
    
    sample_size = min(20, len(unallocated))
    sample_items = df_parts[df_parts['ITEM_ID'].isin(unallocated[:sample_size])]
    fits_found = 0
    for _, part in sample_items.iterrows():
        p_vol = part['LEN_MM'] * part['WID_MM'] * part['DEP_MM']
        p_max = max(part['LEN_MM'], part['WID_MM'], part['DEP_MM'])
        matches = empty_locs[(empty_locs['vol'] >= p_vol) & (empty_locs['max_dim'] >= p_max)]
        if not matches.empty: fits_found += 1
    if fits_found > 0: log(f"FAIL: {len(unallocated)} Items Unallocated. Sample check ({sample_size} items) shows {fits_found} COULD fit in currently empty bins.")
    else: log("WARN: Unallocated items exist, but appear too large for available empty bins.")

# ==========================================
# 5. VISUALIZATION
# ==========================================

def plot_single_bin(row, title_prefix, filename_tag):
    fig = plt.figure(figsize=(18, 10))
    gs = fig.add_gridspec(2, 2, width_ratios=[2, 1])
    ax1 = fig.add_subplot(gs[0, 0]); ax2 = fig.add_subplot(gs[1, 0]); ax_text = fig.add_subplot(gs[:, 1]); ax_text.axis('off')
    bin_w, bin_h, bin_d = row['width'], row['height'], row['depth']
    item_w, item_h, item_d = row['ORIENT_X_MM'], row['ORIENT_Z_MM'], row['ORIENT_Y_MM']
    gx, gy, gz = int(row['GRID_X']), int(row['GRID_Y']), int(row['GRID_Z'])
    
    ax1.add_patch(patches.Rectangle((0, 0), bin_w, bin_h, fill=False, edgecolor='red', lw=3, label='Bin'))
    for z in range(gz):
        for x in range(gx):
            ax1.add_patch(patches.Rectangle((x*item_w, z*item_h), item_w, item_h, lw=1, ec='black', fc='skyblue', alpha=0.6))
    ax1.set_title("FRONT VIEW (X-Z)", fontsize=12, fontweight='bold'); ax1.set_xlim(-50, bin_w + 50); ax1.set_ylim(-50, bin_h + 50); ax1.set_aspect('equal'); ax1.grid(True, linestyle=':', alpha=0.5)

    ax2.add_patch(patches.Rectangle((0, 0), bin_w, bin_d, fill=False, edgecolor='red', lw=3))
    for y in range(gy):
        for x in range(gx):
            ax2.add_patch(patches.Rectangle((x*item_w, y*item_d), item_w, item_d, lw=1, ec='black', fc='orange', alpha=0.6))
    ax2.set_title("TOP VIEW (X-Y)", fontsize=12, fontweight='bold'); ax2.set_xlim(-50, bin_w + 50); ax2.set_ylim(-50, bin_d + 50); ax2.set_aspect('equal'); ax2.grid(True, linestyle=':', alpha=0.5)

    info_text = (
        f"REPORT: {title_prefix}\n----------------------------------------\n"
        f"ALLOCATION ROW:   {row['ROW_ID']}\nLOCATION ID:      {row['loc_inst_code']}\nITEM ID:          {row['ITEM_ID']}\n"
        f"----------------------------------------\nMETRICS:\n"
        f"  Utilization:    {row['UTILIZATION_PCT']:.2f} %\n  Total Units:    {row['MAX_UNITS']}\n\n"
        f"DIMENSIONS (mm):\n  Bin (WxDxH):    {bin_w} x {bin_d} x {bin_h}\n"
        f"  Item Orient:    {row['ORIENT_X_MM']} x {row['ORIENT_Y_MM']} x {row['ORIENT_Z_MM']}\n"
        f"  Stack Total:    {gx*item_w:.1f} x {gy*item_d:.1f} x {gz*item_h:.1f}\n\n"
        f"GRID CONFIG:\n  Cols (X):       {gx}\n  Rows (Y):       {gy}\n  Layers (Z):     {gz}\n\n"
        f"VOLUMES (mm3):\n  Bin Volume:     {row['LOC_VOL']:,.0f}\n  Bulk Item Vol:  {row['STACK_VOL']:,.0f}\n"
    )
    ax_text.text(0.05, 0.95, info_text, transform=ax_text.transAxes, fontsize=12, verticalalignment='top', family='monospace', bbox=dict(boxstyle="round,pad=0.5", fc="white", ec="gray", alpha=0.9))
    plt.tight_layout(); save_path = f"{OUTPUT_DIR}/{filename_tag}.png"; plt.savefig(save_path); plt.close(); log(f"Plot saved to {save_path}")

def visualize_utilization_extremes(datasets):
    log("--- Generating Utilization Plots ---")
    df_alloc = datasets['ALLOCATIONS']; df_loc = datasets['LOCATIONS']
    # UPDATED: Merge on loc_inst_code
    merged = df_alloc.merge(df_loc, on='loc_inst_code', how='inner', suffixes=('', '_LOC'))
    if merged.empty: return
    merged['STACK_VOL'] = merged['QTY_ALLOCATED'] * (merged['ORIENT_X_MM'] * merged['ORIENT_Y_MM'] * merged['ORIENT_Z_MM'])
    merged['LOC_VOL'] = merged['width'] * merged['depth'] * merged['height']
    merged = merged[merged['LOC_VOL'] > 0]
    
    # We can rely on the existing UTILIZATION_PCT column now, but calculating it ensures the plot math matches geometry
    # merged['UTILIZATION_PCT'] = (merged['STACK_VOL'] / merged['LOC_VOL']) * 100
    
    top_row = merged.sort_values(by='UTILIZATION_PCT', ascending=False).iloc[0]
    plot_single_bin(top_row, "HIGHEST UTILIZATION", "visual_utilization_max")
    active = merged[merged['UTILIZATION_PCT'] > 0]
    if not active.empty:
        low_row = active.sort_values(by='UTILIZATION_PCT', ascending=True).iloc[0]
        plot_single_bin(low_row, "LOWEST UTILIZATION", "visual_utilization_min")

# ==========================================
# 6. RUNNER
# ==========================================

def run_full_diagnostic():
    datasets, valid_flags = load_all_data()
    if not all(valid_flags.values()): log("STOPPING: Schema Errors."); return

    # 1. Volume Data Integrity
    log("--- Check: Volume Data Integrity ---")
    res = estimate_and_sample(datasets['ALLOCATIONS'], "Volume Data", check_volume_data_integrity, datasets['LOCATIONS'])
    if not res.empty: log(f"FAIL: {len(res)} Volume Data mismatches."); res.to_csv(f"{OUTPUT_DIR}/fail_volume_data.csv", index=False)
    else: log("PASS: Stated volumes match location dimensions.")
    
    # 2. Utilization Data Integrity
    log("--- Check: Stated Utilization Accuracy ---")
    res = estimate_and_sample(datasets['ALLOCATIONS'], "Utilization Data", check_stated_utilization, datasets['LOCATIONS'])
    if not res.empty: log(f"FAIL: {len(res)} Utilization % mismatches."); res.to_csv(f"{OUTPUT_DIR}/fail_utilization_data.csv", index=False)
    else: log("PASS: Utilization data matches calculations.")

    # 3. Inventory Balance
    check_inventory_balance(datasets['ALLOCATIONS'], datasets['PARTS'])

    # 4. Standard Checks
    log("--- Check: Single SKU per Bin ---")
    # UPDATED: using loc_inst_code
    dupes = datasets['ALLOCATIONS'][datasets['ALLOCATIONS'].duplicated(subset=['loc_inst_code'], keep=False)]
    if not dupes.empty: log(f"FAIL: {len(dupes)} locations have multiple SKUs."); dupes.to_csv(f"{OUTPUT_DIR}/fail_single_sku.csv")
    else: log("PASS: Single SKU constraint met.")

    log("--- Check: Grid Math ---")
    res = estimate_and_sample(datasets['ALLOCATIONS'], "Grid Math", check_grid_consistency)
    if not res.empty: log(f"FAIL: {len(res)} Grid Math errors."); res.to_csv(f"{OUTPUT_DIR}/fail_grid_math.csv", index=False)
    else: log("PASS: Grid Math consistent.")

    log("--- Check: Rigid Body ---")
    res = estimate_and_sample(datasets['ALLOCATIONS'], "Rigid Body", check_rigid_body, datasets['PARTS'])
    if not res.empty: log(f"FAIL: {len(res)} Rigid Body errors."); res.to_csv(f"{OUTPUT_DIR}/fail_rigid_body.csv", index=False)
    else: log("PASS: Rigid Body dimensions valid.")

    log("--- Check: Stack vs Bin Dimensions ---")
    res = estimate_and_sample(datasets['ALLOCATIONS'], "Stack Fit", check_stack_fit, datasets['LOCATIONS'])
    if not res.empty: log(f"FAIL: {len(res)} allocations exceed bin dimensions."); res.to_csv(f"{OUTPUT_DIR}/fail_stack_fit.csv", index=False)
    else: log("PASS: All stacks fit within bins.")

    log("--- Check: Bin Overlaps ---")
    res = estimate_and_sample(datasets['LOCATIONS'], "Bin Overlap", check_bin_overlaps)
    if not res.empty: log(f"FAIL: {len(res)} locations overlap physically."); res.to_csv(f"{OUTPUT_DIR}/fail_bin_overlap.csv", index=False)
    else: log("PASS: No bin overlaps.")

    check_unallocated_feasibility(datasets['PARTS'], datasets['ALLOCATIONS'], datasets['LOCATIONS'])
    visualize_utilization_extremes(datasets)

    with open(f"{OUTPUT_DIR}/validation_report.txt", "w") as f: f.write("\n".join(report_buffer))
    log("Validation Complete.")

if __name__ == "__main__":
    run_full_diagnostic()

[2026-01-05 18:41:37] Output folder 'validation_results' ready.
[2026-01-05 18:41:37] [96m--- Loading LOCATIONS (locations_dummy.csv) ---[0m
[2026-01-05 18:41:37] SUCCESS: LOCATIONS loaded (357 rows).
[2026-01-05 18:41:37] [96m--- Loading ALLOCATIONS (allocations.csv) ---[0m
[2026-01-05 18:41:37] SUCCESS: ALLOCATIONS loaded (10 rows).
[2026-01-05 18:41:37] [96m--- Loading PARTS (synthetic_parts_generated.csv) ---[0m
[2026-01-05 18:41:37] SUCCESS: PARTS loaded (75 rows).
[2026-01-05 18:41:37] [96m--- Check: Volume Data Integrity ---[0m
[2026-01-05 18:41:37] Starting Volume Data...
[2026-01-05 18:41:37]    Est. time: 0.01s


100%|████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 4843.87it/s]


[2026-01-05 18:41:37] [92m[1mPASS[0m: Stated volumes match location dimensions.
[2026-01-05 18:41:37] [96m--- Check: Stated Utilization Accuracy ---[0m
[2026-01-05 18:41:37] Starting Utilization Data...
[2026-01-05 18:41:37]    Est. time: 0.01s
[2026-01-05 18:41:37] INFO: Checking utilization accuracy using column: 'UTILIZATION_PCT'


100%|████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 3740.57it/s]


[2026-01-05 18:41:37] [92m[1mPASS[0m: Utilization data matches calculations.
[2026-01-05 18:41:37] [96m--- Check: Inventory Balance (Partial Unallocated) ---[0m
[2026-01-05 18:41:37] [91m[1mFAIL[0m: Found 66 Items with inventory mismatches.
[2026-01-05 18:41:37] [96m--- Check: Single SKU per Bin ---[0m
[2026-01-05 18:41:37] [92m[1mPASS[0m: Single SKU constraint met.
[2026-01-05 18:41:37] [96m--- Check: Grid Math ---[0m
[2026-01-05 18:41:37] Starting Grid Math...
[2026-01-05 18:41:37]    Est. time: 0.01s


100%|████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 3665.07it/s]


[2026-01-05 18:41:37] [92m[1mPASS[0m: Grid Math consistent.
[2026-01-05 18:41:37] [96m--- Check: Rigid Body ---[0m
[2026-01-05 18:41:37] Starting Rigid Body...
[2026-01-05 18:41:37]    Est. time: 0.01s


100%|████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 3460.93it/s]


[2026-01-05 18:41:37] [92m[1mPASS[0m: Rigid Body dimensions valid.
[2026-01-05 18:41:37] [96m--- Check: Stack vs Bin Dimensions ---[0m
[2026-01-05 18:41:37] Starting Stack Fit...
[2026-01-05 18:41:37]    Est. time: 0.01s


100%|████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 3738.57it/s]


[2026-01-05 18:41:37] [92m[1mPASS[0m: All stacks fit within bins.
[2026-01-05 18:41:37] [96m--- Check: Bin Overlaps ---[0m
[2026-01-05 18:41:37] Starting Bin Overlap...
[2026-01-05 18:41:37]    Est. time: 0.02s


100%|█████████████████████████████████████████████████████████████████████████████| 357/357 [00:00<00:00, 70444.42it/s]

[2026-01-05 18:41:37] [92m[1mPASS[0m: No bin overlaps.
[2026-01-05 18:41:37] [96m--- Check: Unallocated Feasibility ---[0m
[2026-01-05 18:41:37] INFO: 66 items are Unallocated. Checking if they fit in empty bins...





[2026-01-05 18:41:37] [91m[1mFAIL[0m: 66 Items Unallocated. Sample check (20 items) shows 19 COULD fit in currently empty bins.
[2026-01-05 18:41:37] [96m--- Generating Utilization Plots ---[0m
[2026-01-05 18:41:38] Plot saved to validation_results/visual_utilization_max.png
[2026-01-05 18:41:39] Plot saved to validation_results/visual_utilization_min.png
[2026-01-05 18:41:39] Validation Complete.
