In [None]:
# 📁 01_generate_segmentation_data.ipynb

# ## 1. Introduction
"""
Generate synthetic customer data for segmentation based on RFM and telco usage patterns.
"""

# ## 2. Imports
import pandas as pd
import numpy as np

# ## 3. Configuration
np.random.seed(42)
n = 1000

# ## 4. Generate Features
customer_id = [f"CUST{i:04d}" for i in range(n)]
recency = np.random.randint(1, 100, size=n)
frequency = np.random.poisson(5, size=n)
monetary = np.round(np.random.normal(300, 100, size=n), 2)
monetary = np.clip(monetary, 20, None)

avg_call_duration = np.round(np.random.normal(5, 2, size=n), 2)
num_support_tickets = np.random.poisson(1.2, size=n)
data_usage_gb = np.round(np.random.exponential(2, size=n), 2)
app_opens_weekly = np.random.randint(0, 20, size=n)

# ## 5. Assemble DataFrame
df_seg = pd.DataFrame({
    'customer_id': customer_id,
    'recency_days': recency,
    'frequency': frequency,
    'monetary_value': monetary,
    'avg_call_duration_min': avg_call_duration,
    'num_support_tickets': num_support_tickets,
    'data_usage_gb': data_usage_gb,
    'app_opens_weekly': app_opens_weekly
})

# ## 6. Save Dataset
df_seg.to_csv("../data/segmentation_customers.csv", index=False)
df_seg.head()
