In [None]:
import pandas as pd

# -------------------------------
# Config / Mappings
# -------------------------------
RESERVED_MAPPING = {
    "not reserved": "no",
    "vanilla": "yes",
    None: "no"
}


TRANSLATIONS = {
    # finish column
    "ungebeizt": "unpickled",
    "gebeizt": "pickled",
    "gebeizt und geglüht": "pickled and annealed",
    #
    # description column (defects / notes)
    "längs- oder querisse": "longitudinal or transverse cracks",
    "kantenfehler - fs-kantenrisse": "edge defect - fs edge cracks",
    "sollmasse (gewicht) unterschritten": "target weight underachieved"
}

STRING_COLUMNS = ["quality", "finish", "description", "grade", "reserved"]
NUMERIC_COLUMNS = ["thickness_mm", "width_mm", "weight_kg", "quantity", "rp02", "rm", "ag", "ai"]

COLUMN_ORDER = ["article_id", "grade", "description", "finish", "thickness_mm",
                "width_mm", "weight_kg", "quantity", "rp02", "rm", "ag", "ai", "reserved"]

# -------------------------------
# STEP 1: Helper functions
# -------------------------------
def normalize_strings(df: pd.DataFrame, columns: list[str]) -> pd.DataFrame:
    for col in columns:
        if col in df.columns:
            df[col] = df[col].astype(str).str.lower().str.strip()
    return df

def normalize_numeric(df: pd.DataFrame, columns: list[str]) -> pd.DataFrame:
    for col in columns:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0).astype(float)
    return df

def apply_translations(df: pd.DataFrame, columns: list[str], translations: dict) -> pd.DataFrame:
    for col in columns:
        if col in df.columns:
            df[col] = df[col].replace(translations)
    return df

def normalize_reserved(df: pd.DataFrame, col: str = "reserved") -> pd.DataFrame:
    if col in df.columns:
        df[col] = df[col].map(RESERVED_MAPPING).fillna("no")
    return df

# -------------------------------
# STEP 2: Load Data
# -------------------------------
df1 = pd.read_excel("supplier_data1.xlsx")
df2 = pd.read_excel("supplier_data2.xlsx")

# -------------------------------
# STEP 3: Rename Columns
# -------------------------------
df1 = df1.rename(columns={
    "Quality/Choice": "quality",
    "Grade": "grade",
    "Finish": "finish",
    "Thickness (mm)": "thickness_mm",
    "Width (mm)": "width_mm",
    "Description": "description",
    "Gross weight (kg)": "weight_kg",
    "Quantity": "quantity",
    "RP02": "rp02",
    "RM": "rm",
    "AG": "ag",
    "AI": "ai"
})

df2 = df2.rename(columns={
    "Material": "grade",
    "Description": "finish",
    "Article ID": "article_id",
    "Weight (kg)": "weight_kg",
    "Quantity": "quantity",
    "Reserved": "reserved"
})
df2["description"] = df2["finish"]

# -------------------------------
# STEP 4: Standardize values
# -------------------------------
df1 = normalize_strings(df1, STRING_COLUMNS)
df2 = normalize_strings(df2, STRING_COLUMNS)

df1 = normalize_numeric(df1, NUMERIC_COLUMNS)
df2 = normalize_numeric(df2, NUMERIC_COLUMNS)

df1 = normalize_reserved(df1)
df2 = normalize_reserved(df2)

# -------------------------------
# STEP 5: Concatenate datasets
# -------------------------------
inventory_dataset = pd.concat([df1, df2], ignore_index=True)

# Fill missing essential columns
inventory_dataset["reserved"] = inventory_dataset["reserved"].fillna("no")
inventory_dataset["article_id"] = inventory_dataset.get("article_id", pd.Series(["not available"]*len(inventory_dataset)))
inventory_dataset = apply_translations(inventory_dataset, ["finish", "description"], TRANSLATIONS)
inventory_dataset = inventory_dataset.reindex(columns=COLUMN_ORDER)

# -------------------------------
# STEP 6: Save result
# -------------------------------
inventory_dataset.to_csv("inventory_dataset.csv", index=False)
print("✅ inventory_dataset.csv has been created successfully!")


✅ inventory_dataset.csv has been created successfully!
