# 02 - RFM Segmentation & Forecasting

## Objective
Segment customers using RFM (Recency, Frequency, Monetary) and forecast monthly sales.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA

# Load dataset
df = pd.read_csv('../data/cleaned/cleaned_orders.csv')
df['order_date'] = pd.to_datetime(df['order_date'])

## RFM Segmentation

In [None]:

snapshot_date = df['order_date'].max() + pd.Timedelta(days=1)
rfm = df.groupby('customer_id').agg({
    'order_date': lambda x: (snapshot_date - x.max()).days,
    'order_id': 'count',
    'sales': 'sum'
}).rename(columns={
    'order_date': 'Recency',
    'order_id': 'Frequency',
    'sales': 'MonetaryValue'
})

# Score each column
rfm['R'] = pd.qcut(rfm['Recency'], 4, labels=[4, 3, 2, 1]).astype(int)
rfm['F'] = pd.qcut(rfm['Frequency'].rank(method='first'), 4, labels=[1, 2, 3, 4]).astype(int)
rfm['M'] = pd.qcut(rfm['MonetaryValue'], 4, labels=[1, 2, 3, 4]).astype(int)

rfm['RFM_Segment'] = rfm['R'].astype(str) + rfm['F'].astype(str) + rfm['M'].astype(str)
rfm['RFM_Score'] = rfm[['R', 'F', 'M']].sum(axis=1)
rfm.head()
    

## RFM Segment Distribution

In [None]:

rfm['Segment'] = pd.cut(rfm['RFM_Score'], bins=[2, 5, 8, 11, 13], 
                        labels=['At Risk', 'Need Attention', 'Loyal', 'Champions'])
rfm['Segment'].value_counts().plot(kind='bar', title='Customer Segments', figsize=(8,4))
plt.ylabel('Customer Count')
plt.tight_layout()
plt.show()
    

## Sales Forecasting (ARIMA)

In [None]:

monthly_sales = df.set_index('order_date').resample('M')['sales'].sum()
model = ARIMA(monthly_sales, order=(1,1,1))
fitted = model.fit()
forecast = fitted.forecast(steps=6)

monthly_sales.plot(label='Historical', figsize=(10,5))
forecast.plot(label='Forecast', style='--')
plt.title('Monthly Sales Forecast')
plt.legend()
plt.show()
    