# Feature Engineering

This notebook creates business and operational metrics required for
analyzing sales performance, inventory efficiency, and shipping reliability
in a Just-In-Time supply chain.


In [29]:
import pandas as pd
import numpy as np

orders = pd.read_csv("../data/processed/orders_clean.csv")
inventory = pd.read_csv("../data/processed/inventory_clean.csv")
fulfillment = pd.read_csv("../data/processed/fulfillment_clean.csv")


In [30]:
orders["order_date"] = pd.to_datetime(orders["order_date"])
orders["shipment_date"] = pd.to_datetime(orders["shipment_date"])


1️⃣ Net Sales

In [31]:
orders["net_sales"] = orders["Gross Sales"] * (1 - orders["Discount %"])


2️⃣ Unit Price

In [32]:
orders["unit_price"] = orders["Gross Sales"] / orders["Order Quantity"]


3️⃣ Profit Margin (Company-level KPI)

In [33]:
total_profit = orders["Profit"].sum()
total_net_sales = orders["net_sales"].sum()

profit_margin = total_profit / total_net_sales
profit_margin


np.float64(0.7570341623694179)

In [34]:
orders["order_year"] = orders["order_date"].dt.year
orders["order_month"] = orders["order_date"].dt.month
orders["order_year_month"] = orders["order_date"].dt.to_period("M").astype(str)


In [35]:
orders["late_shipment"] = np.where(
    orders["shipping_time"] > orders["Shipment Days - Scheduled"],
    1,
    0
)


In [36]:
late_shipment_rate = orders["late_shipment"].mean()
late_shipment_rate


np.float64(0.4318085147519431)

In [37]:
inventory["storage_cost"] = (
    inventory["Warehouse Inventory"] * inventory["Inventory Cost Per Unit"]
)


In [38]:
monthly_demand = (
    orders
    .groupby(["Product Name", "order_year_month"])["Order Quantity"]
    .sum()
    .reset_index()
)


In [39]:
avg_monthly_demand = (
    monthly_demand
    .groupby("Product Name")["Order Quantity"]
    .mean()
    .reset_index(name="avg_monthly_demand")
)


In [40]:
inventory = inventory.merge(
    avg_monthly_demand,
    on="Product Name",
    how="left"
)


In [41]:
inventory["inventory_coverage_months"] = (
    inventory["Warehouse Inventory"] / inventory["avg_monthly_demand"]
)
inventory["inventory_coverage_months"] = inventory["inventory_coverage_months"].replace(
    [np.inf, -np.inf], np.nan
)

In [42]:
orders.to_csv("../data/processed/orders_features.csv", index=False)
inventory.to_csv("../data/processed/inventory_features.csv", index=False)


## Feature Engineering Summary

- Created net sales, unit price, and profit margin metrics
- Derived time-based features for trend analysis
- Built shipping performance indicators including late shipment flag and rate
- Calculated inventory storage cost
- Introduced inventory coverage ratio to assess overstock and understock risk
