# Customer Segmentation

Group customers by total value and profit and segment into Low, Medium, and High.

In [None]:
# Import Library
import pandas as pd

In [None]:
# Load Data
df = pd.read_csv("./data/DataCoSupplyChainDataset_Cleaned.csv")
print(df.head())

In [None]:
# Checking Correlation Between Sales per Customer and Profit
cust_seg = df.groupby("Customer Id")[["Sales per customer", "Order Profit Per Order"]].sum().reset_index()
print(cust_seg)

<!-- Insight -->
This demonstrates that customers with high sales do not always yield high profits. For instance, Customer ID 4 recorded sales of 1480.71, but the profit is −439.98.

In [None]:
# Initiate Quantile
qt = [0.33, 0.66]

<!-- Insight -->
The values are divided into three segments—Low, Medium, and High—using quantile-based thresholds.

In [None]:
# Calculate Quantile
Sales_Seg = cust_seg["Sales per customer"].quantile(qt)
Profit_Seg = cust_seg["Order Profit Per Order"].quantile(qt)

In [None]:
# Make Segmentation Function
def seg_val(value, quantiles):
    if value <= quantiles[0.33]:
        return "Low"
    elif value <= quantiles[0.66]:
        return "Medium"
    return "High"

In [None]:
# Apply Segmentation Function
cust_seg["Sales Segment"] = cust_seg["Sales per customer"].apply(lambda x: seg_val(x, Sales_Seg))
cust_seg["Profit Segment"] = cust_seg["Order Profit Per Order"].apply(lambda x: seg_val(x, Profit_Seg))
print(cust_seg)

In [None]:
# Save Customer Segmentation
cust_seg.to_csv("./powerbi/CustomerSegmentation_Summary.csv", index=False)