# 1. Import Data

## Import dependencies, set up environment

1. `python3 -m venv .venv`
2. utilize virtual environment
    - (LINUX/MAC) `source .venv/bin/activate`
    - (WINDOWS) `.venv\Scripts\Activate.    ps1`
3. `cd project_folder/util`
4. `pip install -r requirements.txt`

In [5]:
# Jupyter magic
%run ../util/dependencies.py

## FAA Service Discrepancy Reports

In [None]:
filepath = '../data/01_raw/sdrs'
filenames = os.listdir(filepath)
filenames = [filepath + '/' + f for f in filenames]

# try opening first file
if(not pd.read_html(filenames[0])):
   print('ERROR LOADING FILES')

df = pd.read_html(filenames[0])[0]

# concat other files
if len(filenames) > 1:
    for f in filenames[1:]:
        df = pd.concat([df, pd.read_html(f)[0]], ignore_index=True)

In [7]:
df.to_csv("../data/02_csv/SDR_COMPOSITE_EXPORT.csv", index=False)

## Parts Master Table

In [8]:
# -----------------------------
# 1. Load SDR composite CSV
# -----------------------------
sdr_path = "../data/02_csv/SDR_COMPOSITE_EXPORT.csv"
sdr = pd.read_csv(sdr_path)

# Key fields we’ll use if present
# From my code: JASCCode, PartMake, PartName, PartNumber, 
# ComponentMake, ComponentName, ComponentPartNumber

# ---------------------------------
# 2. Build unified "parts universe"
# ---------------------------------

# A) From Part* fields
parts_a = sdr[[
    "JASCCode",
    "PartMake",
    "PartName",
    "PartNumber"
]].copy()

parts_a.rename(columns={
    "PartMake": "part_make",
    "PartName": "part_name",
    "PartNumber": "part_number",
    "JASCCode": "jasc_code"
}, inplace=True)

# B) From Component* fields
parts_b = sdr[[
    "JASCCode",
    "ComponentMake",
    "ComponentName",
    "ComponentPartNumber"
]].copy()

parts_b.rename(columns={
    "ComponentMake": "part_make",
    "ComponentName": "part_name",
    "ComponentPartNumber": "part_number",
    "JASCCode": "jasc_code"
}, inplace=True)

# C) Stack them
parts_all = pd.concat([parts_a, parts_b], ignore_index=True)

# Drop rows with no name at all
parts_all = parts_all.dropna(subset=["part_name"])

# Normalize
parts_all["part_number"] = parts_all["part_number"].fillna("UNKNOWN").astype(str).str.upper().str.strip()
parts_all["part_name"]   = parts_all["part_name"].astype(str).str.upper().str.strip()
parts_all["part_make"]   = parts_all["part_make"].fillna("").astype(str).str.upper().str.strip()

# Deduplicate by (part_number, part_name, jasc_code)
parts_unique = (
    parts_all
    .drop_duplicates(subset=["part_number", "part_name", "jasc_code"])
    .reset_index(drop=True)
)

# ---------------------------------
# 3. Add realistic catalog attributes
# ---------------------------------

# A) OEM vs PMA flag
def is_pma(make):
    if make in ["HEICO", "WENCOR", "CHROMALLOY", "SPIRIT"]:
        return 1
    return 0

parts_unique["pma_flag"] = parts_unique["part_make"].apply(is_pma)
parts_unique["oem_flag"] = 1 - parts_unique["pma_flag"]

# B) NSN (only for standard hardware)
def maybe_nsn(name):
    name = name.upper()

    hardware_terms = [
        "BOLT", "NUT", "WASHER", "CLAMP", "FITTING", "SCREW", "LOCK",
        "BULB", "SEAL", "FUSE", "TIE CLIP", "LOCKING CAM", "HINGE PIN",
        "CIRCUIT BREAKER", "FASTENER", "PIN", "BRACKET"
    ]

    # avoid matching "LIGHT" inside "FLIGHT"
    hardware_terms_exact = [t for t in hardware_terms if " " in t]
    hardware_terms_simple = [t for t in hardware_terms if " " not in t]

    if any(t in name for t in hardware_terms_exact) or \
       any(re.search(rf"\b{t}\b", name) for t in hardware_terms_simple):

        # generate a realistic NSN pattern: XXXX-XX-XXX-XXXX
        return f"{np.random.randint(1000,9999)}-" \
               f"{np.random.randint(10,99)}-" \
               f"{np.random.randint(100,999)}-" \
               f"{np.random.randint(1000,9999)}"

    return ""

parts_unique["nsn"] = parts_unique["part_name"].apply(maybe_nsn)

# C) UOM
def uom_for_part(name):
    name = name.upper()

    # Fluids
    if any(t in name for t in ["FLUID", "OIL", "HYDRAULIC", "FUEL"]):
        return "GAL"

    # Wire, tubing, hoses
    if any(t in name for t in ["WIRE", "CABLE", "TUBE", "HOSE"]):
        return "FT"

    # Hardware (bolts, nuts, washers, clamps, etc.)
    hardware_terms = [
        "BOLT", "NUT", "WASHER", "CLAMP", "SCREW", "PIN", "BRACKET",
        "FASTENER", "HINGE", "FITTING"
    ]
    if any(t in name for t in hardware_terms):
        return "EA"

    # Electrical components
    if any(t in name for t in ["BULB", "FUSE", "CIRCUIT BREAKER", "LIGHT"]):
        return "EA"

    # Sealants, adhesives
    if any(t in name for t in ["SEALANT", "ADHESIVE", "EPOXY"]):
        return "OZ"

    # Default
    return "EA"

parts_unique["uom"] = parts_unique["part_name"].apply(uom_for_part)

# D) Weight (synthetic)
def realistic_weight(name):
    name = name.upper()

    # tiny hardware
    if any(t in name for t in ["BOLT", "NUT", "WASHER", "SCREW", "PIN"]):
        return np.round(np.random.uniform(0.01, 0.5), 3)

    # small electrical components
    if any(t in name for t in ["SENSOR", "SWITCH", "RELAY", "FUSE", "LIGHT"]):
        return np.round(np.random.uniform(0.2, 2.0), 2)

    # hydraulics & pneumatics
    if any(t in name for t in ["PUMP", "VALVE", "ACTUATOR", "REGULATOR"]):
        return np.round(np.random.uniform(2.0, 25.0), 2)

    # avionics LRUs
    if any(t in name for t in ["COMPUTER", "MODULE", "CONTROLLER", "BATTERY"]):
        return np.round(np.random.uniform(5.0, 40.0), 2)

    # structures
    if any(t in name for t in ["FAIRING", "PANEL", "BRACKET"]):
        return np.round(np.random.uniform(10.0, 80.0), 2)

    # default catch‑all
    return np.round(np.random.uniform(0.5, 20.0), 2)

parts_unique["weight_lbs"] = parts_unique["part_name"].apply(realistic_weight)

# E) Life limits (LLPs only)
def life_limit_cycles(name):
    name = name.upper()

    if any(t in name for t in ["FAN DISK", "COMPRESSOR DISK", "TURBINE DISK"]):
        return np.random.randint(20000, 60000)

    if any(t in name for t in ["SHAFT", "ROTOR", "SPOOL"]):
        return np.random.randint(15000, 40000)

    if any(t in name for t in ["BLADE", "HPT BLADE", "LPT BLADE"]):
        return np.random.randint(10000, 25000)

    if "BEARING" in name:
        return np.random.randint(5000, 15000)

    return None


def life_limit_hours(name):
    name = name.upper()

    if any(t in name for t in ["FAN DISK", "COMPRESSOR DISK", "TURBINE DISK"]):
        return np.random.randint(10000, 30000)

    if any(t in name for t in ["SHAFT", "ROTOR", "SPOOL"]):
        return np.random.randint(8000, 20000)

    if any(t in name for t in ["BLADE", "HPT BLADE", "LPT BLADE"]):
        return np.random.randint(5000, 15000)

    if "BEARING" in name:
        return np.random.randint(3000, 8000)

    return None

parts_unique["life_limit_cycles"] = parts_unique["part_name"].apply(life_limit_cycles)
parts_unique["life_limit_hours"]  = parts_unique["part_name"].apply(life_limit_hours)

# F) Repairable vs expendable
def repairable(name):
    name = name.upper()

    rotable_terms = [
        "PUMP", "ACTUATOR", "VALVE", "BATTERY", "SLIDE", "GENERATOR",
        "STARTER", "MOTOR", "CONTROLLER", "COMPUTER", "MODULE",
        "SENSOR", "REGULATOR", 'PACK'
    ]
    
    expendable_terms = [ "BOLT", "NUT", "WASHER", "SCREW", "PIN", "SEAL",
                        "O-RING", "GASKET", "FILTER", "FUSE", "BULB", "ADHESIVE",
                        "SEALANT", "PANEL", "SKIN", "BRACKET", "FRAME", "BEAM",
                        "FLOOR", "FITTING", "LUG", "ANGLE", "SHIM", "DOOR",
                        "STRUCTURE", "FASTENER", "BOTTLE" ]

    if any(t in name for t in rotable_terms):
        return 1

    if any(t in name for t in expendable_terms):
        return 0

    # default: most parts are expendable unless clearly rotable
    return 0

parts_unique["repairable_flag"] = parts_unique["part_name"].apply(repairable)
parts_unique["expendable_flag"] = 1 - parts_unique["repairable_flag"]

# G) Alternate part numbers (PMA alternates)
def alt_pn(row):
    if row["pma_flag"] == 1:
        base = row["part_number"]
        suffix = np.random.choice(["-PMA", "-ALT", "-STC", "-R1"])
        return base + suffix
    return ""

parts_unique["alternate_part_numbers"] = parts_unique.apply(alt_pn, axis=1)

# ---------------------------------
# 4. Cost, lead time, criticality
# ---------------------------------

def lead_time_for_jasc(jasc):
    try:
        j = int(str(jasc)[:2])
    except:
        return np.random.randint(10, 45)
    if j in [25, 26, 27]:
        return np.random.randint(20, 45)
    if j in [32, 33, 34]:
        return np.random.randint(45, 90)
    if j in [52, 53, 54]:
        return np.random.randint(30, 75)
    return np.random.randint(10, 60)

def cost_for_part(row):
    name = row["part_name"]
    if "SLIDE" in name:
        return np.random.randint(15000, 45000)
    if "BATTERY" in name:
        return np.random.randint(500, 5000)
    if "LIGHT" in name:
        return np.random.randint(100, 1500)
    return np.random.randint(1000, 50000)

def criticality(row):
    jasc = row["jasc_code"]
    try:
        j = int(str(jasc)[:2])
    except:
        return "Low"
    if j in [27, 32, 33, 34]:
        return "AOG-critical"
    if j in [25, 26, 52, 53]:
        return "High"
    return "Medium"

parts_unique["unit_cost"] = parts_unique.apply(cost_for_part, axis=1)
parts_unique["lead_time_days"] = parts_unique["jasc_code"].apply(lead_time_for_jasc)
parts_unique["criticality_level"] = parts_unique.apply(criticality, axis=1)

# ---------------------------------
# 5. Final parts_master
# ---------------------------------

parts_master = parts_unique[[
    "part_number",
    "part_name",
    "part_make",
    "jasc_code",
    "alternate_part_numbers",
    "nsn",
    "uom",
    "weight_lbs",
    "unit_cost",
    "lead_time_days",
    "criticality_level",
    "repairable_flag",
    "expendable_flag",
    "life_limit_cycles",
    "life_limit_hours",
    "oem_flag",
    "pma_flag"
]].copy()

# -----------------------------
# 6. Save output
# -----------------------------
parts_master.to_csv("../data/02_csv/parts_master.csv", index=False)

print("Generated: parts_master.csv")

Generated: parts_master.csv


In [9]:
# -----------------------------
# 1. Load Parts Master Datasets
# -----------------------------
sdr = pd.read_csv("../data/02_csv/SDR_COMPOSITE_EXPORT.csv")
parts_master = pd.read_csv("../data/02_csv/parts_master.csv")

# -----------------------------
# 3. Create synthetic suppliers
# -----------------------------
num_suppliers = 8
suppliers = pd.DataFrame({
    "supplier_id": [f"S{i}" for i in range(1, num_suppliers + 1)],
    "supplier_name": [f"Supplier_{i}" for i in range(1, num_suppliers + 1)],
    "on_time_delivery_rate": np.round(np.random.uniform(0.85, 0.99, num_suppliers), 3),
    "avg_delay_days": np.random.randint(1, 21, num_suppliers),
    "quality_rating": np.round(np.random.uniform(3.0, 5.0, num_suppliers), 2),
})

# Assign suppliers based (loosely) on JASC band so it's not purely random
def supplier_for_jasc(jasc):
    try:
        j = int(str(jasc)[:2])
    except (ValueError, TypeError):
        return np.random.choice(suppliers["supplier_id"])
    # Group JASC bands to supplier ranges
    if j in [21, 22, 23, 24]:
        return np.random.choice(suppliers["supplier_id"][:3])
    elif j in [25, 26, 27, 28, 29]:
        return np.random.choice(suppliers["supplier_id"][2:6])
    else:
        return np.random.choice(suppliers["supplier_id"])

parts_unique["supplier_id"] = parts_unique["jasc_code"].apply(supplier_for_jasc)

# -----------------------------
# 4. Lead times, unit costs, criticality
# -----------------------------
def lead_time_for_jasc(jasc):
    try:
        j = int(str(jasc)[:2])
    except (ValueError, TypeError):
        return np.random.randint(10, 45)
    if j in [21, 22, 23]:          # air conditioning, auto flight, comm/nav
        return np.random.randint(7, 14)
    elif j in [25, 26, 27]:        # equipment/furnishings, fire, flight controls
        return np.random.randint(20, 45)
    elif j in [32, 33, 34]:        # landing gear, lights, nav
        return np.random.randint(45, 90)
    elif j in [52, 53, 54]:        # doors, fuselage, nacelles
        return np.random.randint(30, 75)
    else:
        return np.random.randint(10, 60)

def cost_for_row(row):
    jasc = row["jasc_code"]
    name = str(row["part_name"]).upper()
    try:
        j = int(str(jasc)[:2])
    except (ValueError, TypeError):
        j = None

    # Heuristic by type
    if "SLIDE" in name:
        return np.random.randint(15000, 45000)
    if "FLOORBEAM" in name or "FLOOR BEAM" in name:
        return np.random.randint(5000, 25000)
    if "BATTERY" in name:
        return np.random.randint(500, 5000)
    if "POWER SUPPLY" in name:
        return np.random.randint(2000, 10000)
    if "LAMP" in name or "LIGHT" in name:
        return np.random.randint(100, 1500)

    # Fallback by JASC band
    if j in [21, 22, 23]:
        return np.random.randint(500, 8000)
    elif j in [25, 26, 27]:
        return np.random.randint(2000, 20000)
    elif j in [32, 33, 34]:
        return np.random.randint(10000, 100000)
    elif j in [52, 53, 54]:
        return np.random.randint(5000, 50000)
    else:
        return np.random.randint(1000, 15000)

def criticality_for_row(row):
    jasc = row["jasc_code"]
    cond = str(row.get("condition", "")).upper()
    name = str(row["part_name"]).upper()
    try:
        j = int(str(jasc)[:2])
    except (ValueError, TypeError):
        j = None

    # Condition-based bump
    if any(k in cond for k in ["CRACK", "INOPERATIVE", "FAULT", "FAILED"]):
        base = "High"
    elif "CORROD" in cond:
        base = "Medium"
    else:
        base = "Low"

    # ATA-based override for safety-critical systems
    if j in [27, 32, 33, 34]:
        return "AOG-critical"
    if j in [25, 26, 52, 53]:
        return "High" if base != "Low" else "Medium"

    return base

parts_unique["lead_time_days"] = parts_unique["jasc_code"].apply(lead_time_for_jasc)
parts_unique["unit_cost"] = parts_unique.apply(cost_for_row, axis=1)
parts_unique["criticality_level"] = parts_unique.apply(criticality_for_row, axis=1)

parts_master = parts_unique[
    [
        "part_number",
        "part_name",
        "part_make",
        "jasc_code",
        "unit_cost",
        "lead_time_days",
        "criticality_level",
        "supplier_id",
    ]
].copy()

# -----------------------------
# 5. Inventory table (failure-driven)
# -----------------------------
# Count failures by part_number + part_name across both Part* and Component* usage
# First, normalize SDR into a "usage" view
usage_a = sdr[["PartNumber", "PartName"]].copy()
usage_a.rename(columns={"PartNumber": "part_number", "PartName": "part_name"}, inplace=True)

usage_b = sdr[["ComponentPartNumber", "ComponentName"]].copy()
usage_b.rename(columns={"ComponentPartNumber": "part_number", "ComponentName": "part_name"}, inplace=True)

usage_all = pd.concat([usage_a, usage_b], ignore_index=True)
usage_all = usage_all.dropna(subset=["part_name"])
usage_all["part_number"] = usage_all["part_number"].fillna("UNKNOWN")

failure_counts = (
    usage_all
    .groupby(["part_number", "part_name"])
    .size()
    .reset_index(name="failure_count")
)

inventory = parts_master.merge(
    failure_counts,
    on=["part_number", "part_name"],
    how="left"
)

inventory["failure_count"] = inventory["failure_count"].fillna(0)

def stock_from_row(row):
    f = row["failure_count"]
    crit = row["criticality_level"]
    cost = row["unit_cost"]
    lt = row["lead_time_days"]

    # Base on failures
    if f == 0:
        base = 0
    elif f < 3:
        base = 1
    elif f < 10:
        base = 3
    else:
        base = 5

    # Adjust for criticality and lead time
    if crit == "AOG-critical":
        base = max(base, 3)
    if lt > 45:
        base = max(base, 3)

    # Adjust down for very high cost
    if cost > 50000 and base > 3:
        base = 3

    return base

inventory["stock_on_hand"] = inventory.apply(stock_from_row, axis=1)
inventory["reorder_point"] = (inventory["stock_on_hand"] // 2).clip(lower=0)
inventory["reorder_quantity"] = (inventory["stock_on_hand"] * 2).clip(lower=0)
inventory["backorder_flag"] = inventory["stock_on_hand"] == 0

inventory = inventory[
    [
        "part_number",
        "part_name",
        "stock_on_hand",
        "reorder_point",
        "reorder_quantity",
        "backorder_flag",
        "failure_count",
        "criticality_level",
        "unit_cost",
        "lead_time_days",
        "supplier_id",
    ]
].copy()

# -----------------------------
# 6. Save outputs
# -----------------------------
suppliers.to_csv("../data/02_csv/suppliers.csv", index=False)
inventory.to_csv("../data/02_csv/inventory.csv", index=False)

print("Generated: suppliers.csv, inventory.csv")

Generated: suppliers.csv, inventory.csv


## Work Order Table

In [11]:
# -----------------------------
# 1. Load existing tables
# -----------------------------
sdr = pd.read_csv("../data/02_csv/SDR_COMPOSITE_EXPORT.csv")
parts_master = pd.read_csv("../data/02_csv/parts_master.csv")
inventory = pd.read_csv("../data/02_csv/inventory.csv")

# Normalize column names
sdr.columns = [c.strip() for c in sdr.columns]
parts_master.columns = [c.strip() for c in parts_master.columns]
inventory.columns = [c.strip() for c in inventory.columns]

# -----------------------------
# 2. Safe column accessor
# -----------------------------
def safe_get_col(df, col):
    return df[col].fillna("").astype(str) if col in df.columns else pd.Series([""] * len(df))

# -----------------------------
# 3. Normalize SDR fields
# -----------------------------
sdr["PartNumber"] = safe_get_col(sdr, "PartNumber").str.upper().str.strip()
sdr["PartName"]   = safe_get_col(sdr, "PartName").str.upper().str.strip()
sdr["Discrepancy"] = safe_get_col(sdr, "Discrepancy")
sdr["PartCondition"] = safe_get_col(sdr, "PartCondition")
sdr["StageOfOperationCode"] = safe_get_col(sdr, "StageOfOperationCode")
sdr["HowDiscoveredCode"] = safe_get_col(sdr, "HowDiscoveredCode")

# -----------------------------
# 4. Deduplicate parts_master & inventory before join
# -----------------------------
parts_master["part_number"] = parts_master["part_number"].astype(str).str.upper().str.strip()
parts_master["part_name"]   = parts_master["part_name"].astype(str).str.upper().str.strip()
inventory["part_number"]    = inventory["part_number"].astype(str).str.upper().str.strip()
inventory["part_name"]      = inventory["part_name"].astype(str).str.upper().str.strip()

parts_master = parts_master.drop_duplicates(
    subset=["part_number", "part_name", "jasc_code"],
    keep="first"
).reset_index(drop=True)

inventory = inventory.drop_duplicates(
    subset=["part_number", "part_name"],
    keep="first"
).reset_index(drop=True)

# -----------------------------
# 5. Create SDR key (one row per SDR)
# -----------------------------
sdr_key = sdr[["OperatorControlNumber", "JASCCode", "PartNumber", "PartName"]].copy()
sdr_key.rename(columns={
    "OperatorControlNumber": "sdr_id",
    "JASCCode": "jasc_code",
    "PartNumber": "part_number",
    "PartName": "part_name"
}, inplace=True)

# one WO per SDR ID
sdr_key = sdr_key.drop_duplicates(subset=["sdr_id"], keep="first").reset_index(drop=True)

# -----------------------------
# 6. Join SDR → parts_master (single match per SDR)
# -----------------------------
merged = sdr_key.merge(
    parts_master,
    on=["part_number", "part_name", "jasc_code"],
    how="left",
    suffixes=("", "_pm")
)

# -----------------------------
# 7. Join inventory (stocking data)
# -----------------------------
merged = merged.merge(
    inventory,
    on=["part_number", "part_name"],
    how="left",
    suffixes=("", "_inv")
)

# -----------------------------
# 8. Fill remaining missing values with defaults
# -----------------------------
# merged["criticality_level"] = merged["criticality_level"].fillna("Medium")
merged["unit_cost"] = merged["unit_cost"].fillna(5000)
merged["lead_time_days"] = merged["lead_time_days"].fillna(30)
merged["supplier_id"] = merged["supplier_id"].fillna("S0")
merged["stock_on_hand"] = merged["stock_on_hand"].fillna(0)
merged["failure_count"] = merged["failure_count"].fillna(0)

# bring in SDR context for codes/narratives
merged = merged.merge(
    sdr[[
        "OperatorControlNumber",
        "Discrepancy",
        "PartCondition",
        "StageOfOperationCode",
        "HowDiscoveredCode"
    ]].drop_duplicates("OperatorControlNumber"),
    left_on="sdr_id",
    right_on="OperatorControlNumber",
    how="left"
)

# -----------------------------
# 9. Mapping helpers
# -----------------------------
def map_discovery_code(row):
    stage = row.get("StageOfOperationCode", "").upper()
    how = row.get("HowDiscoveredCode", "").upper()
    if how in ["V", "VI"]:
        return "V"
    if how in ["IN", "IF"]:
        return "IN"
    if stage in ["LDG", "APP"]:
        return "IN"
    if stage in ["PREFLT", "TAXI"]:
        return "MX"
    return "OP"

def map_fault_code(row):
    cond = (str(row.get("PartCondition", "")) + " " +
            str(row.get("Discrepancy", ""))).upper()
    if "CORROD" in cond:
        return "CORRODED"
    if "CRACK" in cond:
        return "CRACKED"
    if "LEAK" in cond:
        return "LEAKING"
    if "LOOSE" in cond:
        return "LOOSE"
    if "NO VOLT" in cond or "NO VOLTAGE" in cond or "NO PWR" in cond:
        return "NO VOLTAGE"
    if "INOP" in cond or "INOPER" in cond or "FAILED" in cond or "FAULT" in cond:
        return "FAILED"
    if "USER" in cond or "PILOT ERROR" in cond:
        return "USER ERROR"
    if "BURN" in cond:
        return "BURNED"
    if "NO DISCREPANCY" in cond or "NFF" in cond:
        return "NO FAULT FOUND"
    return "OTHER"

def map_action_and_task(row):
    fault = row["fault_code"]
    jasc = row["jasc_code"]
    try:
        j = int(str(jasc)[:2])
    except:
        j = None

    if fault == "CORRODED":
        return "CORR", "Corrosion Removal"
    if fault == "CRACKED":
        return "RPR", "Repair"
    if fault == "LEAKING":
        return "RPR", "Repair"
    if fault == "LOOSE":
        return "ADJ", "Adjustment"
    if fault in ["NO VOLTAGE", "FAILED"]:
        return "R&R", "Remove & Replace"
    if fault == "NO FAULT FOUND":
        return "TS", "Troubleshooting"
    if fault == "USER ERROR":
        return "INS", "Inspection"

    if j in [25, 33]:
        return "R&R", "Remove & Replace"
    if j in [52, 53, 54]:
        return "RPR", "Repair"
    return "TS", "Troubleshooting"

def map_mel_code(row):
    return "D" if row["deferred_flag"] == 1 else "N"

def generate_work_accomplished(row):
    action = row["action_code"]
    fault = row["fault_code"]
    part = str(row["part_name"]).title()
    base = ""

    if action == "R&R":
        base = f"Removed and replaced {part.lower()}; performed operational test."
    elif action == "RPR":
        base = f"Repaired {part.lower()} for condition: {fault.lower()}; tested serviceable."
    elif action == "CORR":
        base = f"Removed corrosion on {part.lower()}; treated and protected area."
    elif action == "ADJ":
        base = f"Adjusted {part.lower()} to correct {fault.lower().replace('_', ' ')}."
    elif action == "TS":
        base = f"Troubleshot {part.lower()} for reported condition: {fault.lower()}."
    elif action == "INS":
        base = f"Inspected {part.lower()} for reported issue; no further action required."
    else:
        base = f"Performed maintenance on {part.lower()} for condition: {fault.lower()}."

    disc = str(row.get("Discrepancy", "")).strip()
    if disc:
        return base + f" SDR discrepancy: {disc[:120]}"
    return base

# -----------------------------
# 10. Timing / priority logic
# -----------------------------
def maintenance_type_for_jasc(jasc):
    try:
        j = int(str(jasc)[:2])
    except:
        return "Line"
    if j in [25, 33, 52]:
        return "Line"
    if j in [53, 54]:
        return "Base"
    return np.random.choice(["Line", "Overnight"])

def repair_duration(row):
    cond_text = (str(row.get("PartCondition", "")) + " " +
                 str(row.get("Discrepancy", ""))).upper()
    jasc = row["jasc_code"]

    base = 2
    if "CRACK" in cond_text:
        base = 12
    elif "CORROD" in cond_text:
        base = 8
    elif "INOPER" in cond_text or "FAULT" in cond_text or "FAILED" in cond_text:
        base = 4
    elif "LEAK" in cond_text:
        base = 3

    try:
        j = int(str(jasc)[:2])
        if j in [53, 54]:
            base += 10
    except:
        pass

    return base

def aog_flag(row):
    if row["criticality_level"] == "AOG-critical":
        return np.random.choice([1, 0], p=[0.7, 0.3])
    return np.random.choice([0, 1], p=[0.8, 0.2])

def backorder_delay(row):
    if row["stock_on_hand"] > 0:
        return 0
    lt = row["lead_time_days"]
    return max(1, int(np.random.normal(lt * 0.4, 2)))

def labor_hours(row):
    dur = row["repair_duration_hours"]
    return round(dur * np.random.uniform(0.8, 1.4), 1)

def deferred_flag(row):
    if row["criticality_level"] == "Low":
        return np.random.choice([1, 0], p=[0.4, 0.6])
    return 0

def priority(row):
    if row["aog_flag"] == 1:
        return "High"
    if row["criticality_level"] in ["High", "AOG-critical"]:
        return "High"
    if row["criticality_level"] == "Medium":
        return "Medium"
    return "Low"

# -----------------------------
# 11. Apply logic
# -----------------------------
merged["maintenance_type"] = merged["jasc_code"].apply(maintenance_type_for_jasc)
merged["repair_duration_hours"] = merged.apply(repair_duration, axis=1)
merged["aog_flag"] = merged.apply(aog_flag, axis=1)
merged["backorder_delay_days"] = merged.apply(backorder_delay, axis=1)
merged["labor_hours"] = merged.apply(labor_hours, axis=1)
merged["turnaround_time_hours"] = merged["repair_duration_hours"] + (merged["backorder_delay_days"] * 24)
merged["deferred_flag"] = merged.apply(deferred_flag, axis=1)
merged["priority"] = merged.apply(priority, axis=1)

merged["discovery_code"] = merged.apply(map_discovery_code, axis=1)
merged["fault_code"] = merged.apply(map_fault_code, axis=1)
merged[["action_code", "task_type"]] = merged.apply(
    lambda r: pd.Series(map_action_and_task(r)),
    axis=1
)
merged["mel_code"] = merged.apply(map_mel_code, axis=1)
merged["work_accomplished"] = merged.apply(generate_work_accomplished, axis=1)

# -----------------------------
# 12. Add work-order ID
# -----------------------------
merged["wo_id"] = ["WO" + str(i).zfill(6) for i in range(len(merged))]

# -----------------------------
# 13. Final selection
# -----------------------------
work_orders = merged[[
    "wo_id",
    "sdr_id",
    "part_number",
    "part_name",
    "jasc_code",
    "maintenance_type",
    "discovery_code",
    "fault_code",
    "action_code",
    "task_type",
    "mel_code",
    "repair_duration_hours",
    "aog_flag",
    "backorder_delay_days",
    "labor_hours",
    "turnaround_time_hours",
    "deferred_flag",
    "priority",
    "work_accomplished"
]]

# -----------------------------
# 14. Save
# -----------------------------
work_orders.to_csv("../data/02_csv/work_orders.csv", index=False)
print("Generated: work_orders.csv")

Generated: work_orders.csv
