# Customer Lifetime Value (LTV) Prediction
This notebook demonstrates how to predict Customer Lifetime Value using synthetic transaction data. It includes data generation, feature engineering, model training, evaluation, and segmentation.

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Dataset
np.random.seed(42)
n_customers = 500

data = pd.DataFrame({
    'Customer_ID': [f'C{i:04d}' for i in range(n_customers)],
    'Recency': np.random.randint(1, 365, size=n_customers),
    'Frequency': np.random.poisson(10, size=n_customers),
    'AOV': np.round(np.random.uniform(10, 500, size=n_customers), 2)
})

data['Predicted_LTV'] = np.round(
    data['Frequency'] * data['AOV'] * np.exp(-data['Recency'] / 365) + np.random.normal(0, 100, n_customers), 2
)

conditions = [
    data['Predicted_LTV'] >= data['Predicted_LTV'].quantile(0.75),
    data['Predicted_LTV'] >= data['Predicted_LTV'].quantile(0.50),
    data['Predicted_LTV'] >= data['Predicted_LTV'].quantile(0.25)
]
choices = ['High', 'Medium', 'Low']
data['Segment'] = np.select(conditions, choices, default='Very Low')
data.head()

In [None]:
# Feature distribution
sns.pairplot(data[['Recency', 'Frequency', 'AOV', 'Predicted_LTV']])
plt.show()

In [None]:
# Data for model
features = ['Recency', 'Frequency', 'AOV']
X = data[features]
y = data['Predicted_LTV']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
preds = model.predict(X_test)

mae = mean_absolute_error(y_test, preds)
rmse = np.sqrt(mean_squared_error(y_test, preds))

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")

In [None]:
# Predictions
data.to_csv("ltv_predictions.csv", index=False)
print("Predictions saved to 'ltv_predictions.csv'")

### Summary
Model trained on Recency, Frequency, and AOV to predict LTV.
- **MAE**: Mean error of predictions.
- **RMSE**: Root error to capture large deviations.

Customer segments created: High, Medium, Low, Very Low.