# 04 - Value Proxy + Action List
Estimate a future revenue proxy and create a recommended action list.

In [None]:
import sys
from pathlib import Path

PROJECT_ROOT = Path.cwd().resolve()
if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))


In [None]:
import pandas as pd

from src import config
from src.cleaning import clean_transactions
from src.features import build_customer_features, compute_future_revenue, build_time_based_labels
from src.io import load_transactions_excel
from src.modeling import build_churn_models, build_value_models, select_feature_columns, predict_churn_probabilities
from src.utils import ensure_dirs, set_random_seed

set_random_seed(config.RANDOM_STATE)
ensure_dirs([config.REPORTS_DIR])

raw_df = load_transactions_excel()
clean_df = clean_transactions(raw_df)

snapshot_date = clean_df['InvoiceDate'].max()
cutoff_date = snapshot_date - pd.Timedelta(days=config.TIME_SPLIT.cutoff_days_before_snapshot)

train_df = clean_df[clean_df['InvoiceDate'] <= cutoff_date].copy()
features = build_customer_features(train_df)
labels = build_time_based_labels(clean_df, cutoff_date, snapshot_date)
features = features.merge(labels.rename('churned'), left_on='CustomerID', right_index=True)

numeric_features, categorical_features = select_feature_columns(features)
X = features[numeric_features + categorical_features]
y = features['churned']

churn_model = build_churn_models(numeric_features, categorical_features)['hist_gradient_boosting']
churn_model.fit(X, y)

features['churn_probability'] = predict_churn_probabilities(churn_model, X)

future_revenue = compute_future_revenue(clean_df, cutoff_date)
features = features.merge(future_revenue, on='CustomerID', how='left')
features[f'future_revenue_{config.DEFAULT_PREDICTION_WINDOW_DAYS}d'] = features[
    f'future_revenue_{config.DEFAULT_PREDICTION_WINDOW_DAYS}d'
].fillna(0)

value_model = build_value_models(numeric_features, categorical_features)['ridge']
value_model.fit(X, features[f'future_revenue_{config.DEFAULT_PREDICTION_WINDOW_DAYS}d'])
features['value_score'] = value_model.predict(X)

value_threshold = features['value_score'].median()
risk_threshold = 0.5

def segment_row(row):
    high_risk = row['churn_probability'] >= risk_threshold
    high_value = row['value_score'] >= value_threshold
    if high_risk and high_value:
        return 'High risk + High value'
    if high_risk and not high_value:
        return 'High risk + Low value'
    if not high_risk and high_value:
        return 'Low risk + High value'
    return 'Low risk + Low value'

segment_actions = {
    'High risk + High value': 'Retention incentive + personal outreach',
    'High risk + Low value': 'Low-cost reactivation email',
    'Low risk + High value': 'Loyalty/upsell offer',
    'Low risk + Low value': 'Nurture / standard campaigns',
}

features['segment'] = features.apply(segment_row, axis=1)
features['recommended_action'] = features['segment'].map(segment_actions)

action_list = features[[
    'CustomerID',
    'churn_probability',
    'value_score',
    'segment',
    'recommended_action',
]].sort_values(['segment', 'churn_probability'], ascending=[True, False])

action_list.to_csv(config.REPORTS_DIR / 'customer_action_list.csv', index=False)
action_list.head()
