In [39]:
import pandas as pd
from lifetimes.utils import calibration_and_holdout_data
from lifetimes import BetaGeoFitter
from lifetimes import GammaGammaFitter 
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Load transactional data
transactions_df = pd.read_csv(r"C:\Users\DYNABOOK\Desktop\cltv_implementation\cltv_implementation\CLTV_Synthetic_Data\Transactional.csv")
transactions_df['purchase_date'] = pd.to_datetime(transactions_df['purchase_date'], errors="coerce")

# Define time periods
min_date = transactions_df['purchase_date'].min()
train_end_date = min_date + pd.Timedelta(days=60)
test_end_date = train_end_date + pd.Timedelta(days=30)

# Create calibration and holdout summary
summary = calibration_and_holdout_data(
    transactions_df,
    customer_id_col='user_id',
    datetime_col='purchase_date',
    monetary_value_col='Total_amount',
    calibration_period_end=train_end_date,
    observation_period_end=test_end_date,
    freq='D'
)

# Fit BG/NBD on training data
bgf = BetaGeoFitter(penalizer_coef=0.001)
bgf.fit(summary['frequency_cal'], summary['recency_cal'], summary['T_cal'])

# BG/NBD predictions
# Predict purchases during training period
summary['predicted_train_purchases'] = bgf.conditional_expected_number_of_purchases_up_to_time(
    summary['T_cal'],
    summary['frequency_cal'],
    summary['recency_cal'],
    summary['T_cal']
)
summary['T'] = summary['T_cal'] + summary['duration_holdout']
summary = summary.dropna(subset=['frequency_cal', 'predicted_train_purchases'])
# Predict purchases in 1-month test period
summary['predicted_test_purchases'] = bgf.conditional_expected_number_of_purchases_up_to_time(
    summary['T'] - summary['T_cal'],  # time after calibration
    summary['frequency_cal'],
    summary['recency_cal'],
    summary['T_cal']
)


# BG/NBD Accuracy
# Train accuracy (frequency during training)
bg_train_rmse = mean_squared_error(
    summary['frequency_cal'],
    summary['predicted_train_purchases'],
    squared=False
)
bg_train_mae = mean_absolute_error(
    summary['frequency_cal'],
    summary['predicted_train_purchases']
)
summary = summary.dropna(subset=['frequency_holdout', 'predicted_test_purchases'])

# Test accuracy (frequency during holdout)
bg_test_rmse = mean_squared_error(
    summary['frequency_holdout'],
    summary['predicted_test_purchases'],
    squared=False
)
bg_test_mae = mean_absolute_error(
    summary['frequency_holdout'],
    summary['predicted_test_purchases']
)

print("📦 BG/NBD Accuracy:")
print(f"Train RMSE: {bg_train_rmse:.4f}")
print(f"Train MAE:  {bg_train_mae:.4f}")
print(f"Test RMSE:  {bg_test_rmse:.4f}")
print(f"Test MAE:   {bg_test_mae:.4f}")


# Filter for Gamma-Gamma eligibility
ggf = GammaGammaFitter(penalizer_coef=0.01)
ggf.fit(
    frequency=summary['frequency_cal'],
    monetary_value=summary['monetary_value_cal']
)

# Predict expected average profit (monetary value)
summary['predicted_monetary'] = ggf.conditional_expected_average_profit(
    summary['frequency_cal'],
    summary['monetary_value_cal']
)

# Predict train and test revenue using predicted average monetary value
summary['predicted_train_revenue'] = summary['predicted_train_purchases'] * summary['predicted_monetary']
summary['predicted_test_revenue'] = summary['predicted_test_purchases'] * summary['predicted_monetary']

# Actual training revenue
summary['actual_train_revenue'] = summary['frequency_cal'] * summary['monetary_value_cal']

# 🔁 Compute actual test revenue from transaction data if not already in `summary`
# This assumes you have a `transactions_holdout` DataFrame available
# and your index is customer_id
holdout_agg = (
    transactions_df
    .groupby('user_id')
    .agg(
        frequency_holdout=('Total_amount', 'count'),
        total_monetary_holdout=('Total_amount', 'sum')
    ).reset_index()
)
holdout_agg['monetary_holdout'] = (
    holdout_agg['total_monetary_holdout'] / holdout_agg['frequency_holdout']
)

# Merge into summary
summary = summary.merge(
    holdout_agg[['user_id','frequency_holdout', 'monetary_holdout']],
    how='left',
    on='user_id'
)


# Now compute actual test revenue
summary['actual_test_revenue'] = summary['frequency_holdout_y'] * summary['monetary_holdout']

# Train accuracy
gg_train_rmse = mean_squared_error(
    summary['actual_train_revenue'],
    summary['predicted_train_revenue'],
    squared=False
)
gg_train_mae = mean_absolute_error(
    summary['actual_train_revenue'],
    summary['predicted_train_revenue']
)

# Test accuracy
gg_test_rmse = mean_squared_error(
    summary['actual_test_revenue'],
    summary['predicted_test_revenue'],
    squared=False
)
gg_test_mae = mean_absolute_error(
    summary['actual_test_revenue'],
    summary['predicted_test_revenue']
)

# Final output
print("💰 Gamma-Gamma Accuracy:")
print(f"Train RMSE: {gg_train_rmse:.4f}")
print(f"Train MAE:  {gg_train_mae:.4f}")
print(f"Test RMSE:  {gg_test_rmse:.4f}")
print(f"Test MAE:   {gg_test_mae:.4f}")

  transactions_df['purchase_date'] = pd.to_datetime(transactions_df['purchase_date'], errors="coerce")
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


📦 BG/NBD Accuracy:
Train RMSE: 0.7823
Train MAE:  0.5464
Test RMSE:  1.0850
Test MAE:   0.8158
💰 Gamma-Gamma Accuracy:
Train RMSE: 361.6204
Train MAE:  242.6876
Test RMSE:  1469.9664
Test MAE:   1200.3320
