In [11]:
import pandas as pd
import numpy as np
import os
import shutil
import time
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from tqdm import tqdm
from datetime import datetime
import glob
import itertools

# ==========================================
# 1. CONFIGURATION & SCHEMA
# ==========================================

LOCATIONS_FILE = 'locations_dummy.csv'
ALLOCATIONS_FILE = 'allocations.csv'
PARTS_FILE = 'synthetic_parts_generated.csv'
OUTPUT_DIR = 'validation_results'
MAX_EXECUTION_TIME_SEC = 300

# Strict Schema
REQUIRED_SCHEMA = {
    'LOCATIONS': {
        'file': LOCATIONS_FILE,
        'columns': ['loc_inst_code', 'width', 'depth', 'height', 'x', 'y', 'z']
    },
    'ALLOCATIONS': {
        'file': ALLOCATIONS_FILE,
        'columns': ['LOCATION_ID', 'SKU', 
                    'GRID_X', 'GRID_Y', 'GRID_Z', 
                    'ORIENT_X_MM', 'ORIENT_Y_MM', 'ORIENT_Z_MM', 
                    'MAX_UNITS', 'CURRENT_STOCK'] 
    },
    'PARTS': {
        'file': PARTS_FILE,
        'columns': ['ITEM_ID', 'LEN_MM', 'WID_MM', 'DEP_MM', 'WT_KG']
    }
}

# ==========================================
# 2. LOGGING & UTILS
# ==========================================

class Colors:
    HEADER = '\033[95m'
    BLUE = '\033[94m'
    CYAN = '\033[96m'
    GREEN = '\033[92m'
    YELLOW = '\033[93m'
    RED = '\033[91m'
    RESET = '\033[0m'
    BOLD = '\033[1m'

report_buffer = []

def log(message):
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    plain_msg = f"[{timestamp}] {message}"
    report_buffer.append(plain_msg)

    colored_msg = message
    if "PASS" in message:
        colored_msg = message.replace("PASS", f"{Colors.GREEN}{Colors.BOLD}PASS{Colors.RESET}")
    elif "FAIL" in message:
        colored_msg = message.replace("FAIL", f"{Colors.RED}{Colors.BOLD}FAIL{Colors.RESET}")
    elif "CRITICAL" in message:
        colored_msg = f"{Colors.RED}{Colors.BOLD}{message}{Colors.RESET}"
    elif "WARN" in message:
        colored_msg = message.replace("WARN", f"{Colors.YELLOW}{Colors.BOLD}WARN{Colors.RESET}")
    elif "---" in message:
        colored_msg = f"{Colors.CYAN}{message}{Colors.RESET}"
    
    print(f"[{timestamp}] {colored_msg}")

def setup_environment():
    if os.path.exists(OUTPUT_DIR):
        shutil.rmtree(OUTPUT_DIR)
    os.makedirs(OUTPUT_DIR)
    log(f"Output folder '{OUTPUT_DIR}' ready.")

# ==========================================
# 3. DATA LOADING
# ==========================================

def load_and_validate_dataset(key, config):
    filepath = config['file']
    required_cols = config['columns']
    
    log(f"--- Loading {key} ---")
    if not os.path.exists(filepath):
        log(f"CRITICAL: File {filepath} not found.")
        return None, False

    try:
        df = pd.read_csv(filepath, sep=None, engine='python', dtype=str)
        df.columns = df.columns.str.strip().str.replace('^ï»¿', '', regex=True)
        
        missing_cols = [c for c in required_cols if c not in df.columns]
        if missing_cols:
            log(f"CRITICAL SCHEMA ERROR in {key}. Missing: {missing_cols}")
            return df, False
        
        log(f"SUCCESS: {key} loaded ({len(df)} rows).")
        return df, True
    except Exception as e:
        log(f"CRITICAL ERROR reading {filepath}: {e}")
        return None, False

def convert_numeric(df, cols):
    for col in cols:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
    return df

def load_all_data():
    setup_environment()
    datasets = {}
    valid_flags = {}
    
    # Locations
    df, valid = load_and_validate_dataset('LOCATIONS', REQUIRED_SCHEMA['LOCATIONS'])
    if valid: df = convert_numeric(df, ['width', 'depth', 'height', 'x', 'y', 'z'])
    datasets['LOCATIONS'] = df
    valid_flags['LOCATIONS'] = valid

    # Allocations
    df, valid = load_and_validate_dataset('ALLOCATIONS', REQUIRED_SCHEMA['ALLOCATIONS'])
    if valid: 
        df = convert_numeric(df, ['GRID_X', 'GRID_Y', 'GRID_Z', 
                                  'ORIENT_X_MM', 'ORIENT_Y_MM', 'ORIENT_Z_MM', 
                                  'MAX_UNITS', 'CURRENT_STOCK'])
    datasets['ALLOCATIONS'] = df
    valid_flags['ALLOCATIONS'] = valid

    # Parts
    df, valid = load_and_validate_dataset('PARTS', REQUIRED_SCHEMA['PARTS'])
    if valid: df = convert_numeric(df, ['LEN_MM', 'WID_MM', 'DEP_MM', 'WT_KG'])
    datasets['PARTS'] = df
    valid_flags['PARTS'] = valid
    
    return datasets, valid_flags

# ==========================================
# 4. TRUST NOTHING CHECKS
# ==========================================

def estimate_and_sample(df, check_name, validation_func, *args):
    log(f"Starting {check_name}...")
    total_rows = len(df)
    test_size = min(1000, total_rows)
    if test_size == 0: return pd.DataFrame()

    start_time = time.time()
    try:
        _ = validation_func(df.head(test_size), *args, quiet=True)
    except Exception as e:
        log(f"ERROR in {check_name}: {e}")
        return pd.DataFrame()
        
    duration = time.time() - start_time
    if duration == 0: duration = 0.001
    est_time = (duration / test_size) * total_rows
    log(f"   Est. time: {est_time:.2f}s")

    if est_time > MAX_EXECUTION_TIME_SEC:
        safe_rows = int((MAX_EXECUTION_TIME_SEC / duration) * test_size)
        log(f"   WARN: Time limit exceeded. Sampling {safe_rows} rows.")
        df_to_process = df.sample(n=safe_rows, random_state=42)
    else:
        df_to_process = df

    return validation_func(df_to_process, *args, quiet=False)

def check_grid_consistency(df_alloc, quiet=False):
    issues = []
    iterator = tqdm(df_alloc.iterrows(), total=df_alloc.shape[0]) if not quiet else df_alloc.iterrows()

    for idx, row in iterator:
        grid_cap = row['GRID_X'] * row['GRID_Y'] * row['GRID_Z']
        max_units = row['MAX_UNITS']
        if abs(grid_cap - max_units) > 0.1:
            issues.append({
                'LOCATION_ID': row['LOCATION_ID'],
                'SKU': row['SKU'],
                'Issue': 'Grid Math Mismatch',
                'Details': f"Grid ({grid_cap}) != Max ({max_units})"
            })
    return pd.DataFrame(issues)

def check_rigid_body(df_alloc, df_parts, quiet=False):
    merged = df_alloc.merge(df_parts, left_on='SKU', right_on='ITEM_ID', how='left')
    issues = []
    iterator = tqdm(merged.iterrows(), total=merged.shape[0]) if not quiet else merged.iterrows()

    for idx, row in iterator:
        if pd.isna(row['LEN_MM']): continue
        orient_dims = sorted([row['ORIENT_X_MM'], row['ORIENT_Y_MM'], row['ORIENT_Z_MM']])
        part_dims = sorted([row['LEN_MM'], row['WID_MM'], row['DEP_MM']])
        
        match = True
        for o, p in zip(orient_dims, part_dims):
            if abs(o - p) > 0.5: match = False
        
        if not match:
            issues.append({
                'LOCATION_ID': row['LOCATION_ID'],
                'SKU': row['SKU'],
                'Issue': 'Dimensions Morphing',
                'Details': f"Alloc {orient_dims} vs Part {part_dims}"
            })
    return pd.DataFrame(issues)

def check_bin_overlaps(df_loc, quiet=False):
    df = df_loc.copy()
    df['x2'] = df['x'] + df['width']
    df['y2'] = df['y'] + df['depth']
    df['z2'] = df['z'] + df['height']
    df.sort_values('x', inplace=True)
    
    issues = []
    data = df.to_dict('records')
    n = len(data)
    if n == 0: return pd.DataFrame()

    iter_range = tqdm(range(n), total=n) if not quiet else range(n)

    for i in iter_range:
        a = data[i]
        for j in range(i + 1, n):
            b = data[j]
            if b['x'] >= a['x2'] - 0.1: break
            if (a['y'] < b['y2']) and (a['y2'] > b['y']) and (a['z'] < b['z2']) and (a['z2'] > b['z']):
                issues.append({
                    'LOC_A': a['loc_inst_code'], 'LOC_B': b['loc_inst_code'],
                    'Issue': 'Physical Overlap', 'Details': f"A({a['x']}) overlaps B({b['x']})"
                })
    return pd.DataFrame(issues)

def check_single_sku(df_alloc):
    dupes = df_alloc[df_alloc.duplicated(subset=['LOCATION_ID'], keep=False)]
    if len(dupes) > 0: return dupes
    return pd.DataFrame()

# ==========================================
# 5. VISUALIZATION
# ==========================================

def plot_single_bin(row, title_prefix, filename_tag):
    """Generic plotter for a specific allocation row."""
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))
    fig.suptitle(f"{title_prefix}: {row['LOCATION_ID']} ({row['UTILIZATION_PCT']:.1f}%)", fontsize=16)

    bin_w, bin_h, bin_d = row['width'], row['height'], row['depth']
    item_w, item_h, item_d = row['ORIENT_X_MM'], row['ORIENT_Z_MM'], row['ORIENT_Y_MM']
    gx, gy, gz = int(row['GRID_X']), int(row['GRID_Y']), int(row['GRID_Z'])

    # FRONT VIEW
    ax1.add_patch(patches.Rectangle((0, 0), bin_w, bin_h, fill=False, edgecolor='red', lw=3))
    for z in range(gz):
        for x in range(gx):
            ax1.add_patch(patches.Rectangle((x*item_w, z*item_h), item_w, item_h, 
                                            lw=1, ec='black', fc='skyblue', alpha=0.6))
    ax1.set_title("Front View (X-Z)")
    ax1.set_xlim(-100, bin_w+100); ax1.set_ylim(-100, bin_h+100)
    ax1.set_aspect('equal')

    # TOP VIEW
    ax2.add_patch(patches.Rectangle((0, 0), bin_w, bin_d, fill=False, edgecolor='red', lw=3))
    for y in range(gy):
        for x in range(gx):
            ax2.add_patch(patches.Rectangle((x*item_w, y*item_d), item_w, item_d, 
                                            lw=1, ec='black', fc='orange', alpha=0.6))
    ax2.set_title("Top View (X-Y)")
    ax2.set_xlim(-100, bin_w+100); ax2.set_ylim(-100, bin_d+100)
    ax2.set_aspect('equal')
    
    save_path = f"{OUTPUT_DIR}/{filename_tag}.png"
    plt.savefig(save_path)
    plt.close()
    log(f"Plot saved to {save_path}")

def visualize_utilization_extremes(datasets):
    log("--- Generating Utilization Plots ---")
    df_alloc = datasets['ALLOCATIONS']
    df_loc = datasets['LOCATIONS']
    merged = df_alloc.merge(df_loc, left_on='LOCATION_ID', right_on='loc_inst_code', how='inner')
    
    if merged.empty: return

    # Calc Stats
    merged['STACK_VOL'] = (merged['GRID_X'] * merged['ORIENT_X_MM']) * \
                          (merged['GRID_Y'] * merged['ORIENT_Y_MM']) * \
                          (merged['GRID_Z'] * merged['ORIENT_Z_MM'])
    merged['LOC_VOL'] = merged['width'] * merged['depth'] * merged['height']
    merged = merged[merged['LOC_VOL'] > 0]
    merged['UTILIZATION_PCT'] = (merged['STACK_VOL'] / merged['LOC_VOL']) * 100
    
    # 1. Top Utilization
    top_row = merged.sort_values(by='UTILIZATION_PCT', ascending=False).iloc[0]
    plot_single_bin(top_row, "Top Utilization", "visual_utilization_max")

    # 2. Lowest Utilization (Non-Zero)
    # We want active allocations, so filter > 0
    active_allocs = merged[merged['UTILIZATION_PCT'] > 0]
    if not active_allocs.empty:
        low_row = active_allocs.sort_values(by='UTILIZATION_PCT', ascending=True).iloc[0]
        plot_single_bin(low_row, "Lowest Utilization", "visual_utilization_min")
    else:
        log("WARN: No active allocations found to plot lowest utilization.")

# ==========================================
# 6. RUNNER
# ==========================================

def run_full_diagnostic():
    datasets, valid_flags = load_all_data()
    
    if not all(valid_flags.values()):
        log("STOPPING: Schema Errors. Check log above.")
        return

    # 1. Single SKU
    log("--- Check: Single SKU per Bin ---")
    dupes = check_single_sku(datasets['ALLOCATIONS'])
    if not dupes.empty:
        log(f"FAIL: {len(dupes)} locations with multiple SKUs/duplicates.")
        dupes.to_csv(f"{OUTPUT_DIR}/fail_single_sku.csv")
    else:
        log("PASS: All bins contain single SKU.")

    # 2. Grid Math
    log("--- Check: Grid Math Consistency ---")
    grid_issues = estimate_and_sample(datasets['ALLOCATIONS'], "Grid Math", check_grid_consistency)
    if not grid_issues.empty:
        log(f"FAIL: {len(grid_issues)} allocations have grid mismatch.")
        grid_issues.to_csv(f"{OUTPUT_DIR}/fail_grid_math.csv", index=False)
    else:
        log("PASS: Grid math consistent.")

    # 3. Rigid Body
    log("--- Check: Rigid Body Physics ---")
    rigid_issues = estimate_and_sample(datasets['ALLOCATIONS'], "Rigid Body", check_rigid_body, datasets['PARTS'])
    if not rigid_issues.empty:
        log(f"FAIL: {len(rigid_issues)} allocations morph dimensions.")
        rigid_issues.to_csv(f"{OUTPUT_DIR}/fail_rigid_body.csv", index=False)
    else:
        log("PASS: Rigid body dimensions valid.")

    # 4. Bin Overlap
    log("--- Check: Global Bin Overlaps ---")
    overlap_issues = estimate_and_sample(datasets['LOCATIONS'], "Bin Overlap", check_bin_overlaps)
    if not overlap_issues.empty:
        log(f"FAIL: {len(overlap_issues)} pairs of locations overlap.")
        overlap_issues.to_csv(f"{OUTPUT_DIR}/fail_bin_overlap.csv", index=False)
    else:
        log("PASS: No bin overlaps detected.")

    # 5. Visualization
    visualize_utilization_extremes(datasets)

    with open(f"{OUTPUT_DIR}/validation_report.txt", "w") as f:
        f.write("\n".join(report_buffer))
    log("Validation Complete.")

if __name__ == "__main__":
    run_full_diagnostic()

[2025-12-28 14:59:40] Output folder 'validation_results' ready.
[2025-12-28 14:59:40] [96m--- Loading LOCATIONS ---[0m
[2025-12-28 14:59:40] SUCCESS: LOCATIONS loaded (357 rows).
[2025-12-28 14:59:40] [96m--- Loading ALLOCATIONS ---[0m
[2025-12-28 14:59:40] SUCCESS: ALLOCATIONS loaded (10 rows).
[2025-12-28 14:59:40] [96m--- Loading PARTS ---[0m
[2025-12-28 14:59:40] SUCCESS: PARTS loaded (75 rows).
[2025-12-28 14:59:40] [96m--- Check: Single SKU per Bin ---[0m
[2025-12-28 14:59:40] [92m[1mPASS[0m: All bins contain single SKU.
[2025-12-28 14:59:40] [96m--- Check: Grid Math Consistency ---[0m
[2025-12-28 14:59:40] Starting Grid Math...
[2025-12-28 14:59:40]    Est. time: 0.00s


100%|████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 6951.12it/s]


[2025-12-28 14:59:40] [92m[1mPASS[0m: Grid math consistent.
[2025-12-28 14:59:40] [96m--- Check: Rigid Body Physics ---[0m
[2025-12-28 14:59:40] Starting Rigid Body...
[2025-12-28 14:59:40]    Est. time: 0.00s


100%|████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 2680.58it/s]


[2025-12-28 14:59:40] [92m[1mPASS[0m: Rigid body dimensions valid.
[2025-12-28 14:59:40] [96m--- Check: Global Bin Overlaps ---[0m
[2025-12-28 14:59:40] Starting Bin Overlap...
[2025-12-28 14:59:40]    Est. time: 0.02s


100%|█████████████████████████████████████████████████████████████████████████████| 357/357 [00:00<00:00, 53211.32it/s]

[2025-12-28 14:59:40] [92m[1mPASS[0m: No bin overlaps detected.
[2025-12-28 14:59:40] [96m--- Generating Utilization Plots ---[0m





[2025-12-28 14:59:40] Plot saved to validation_results/visual_utilization_max.png
[2025-12-28 14:59:41] Plot saved to validation_results/visual_utilization_min.png
[2025-12-28 14:59:41] Validation Complete.
