# Customer Lifetime Value (LTV) Prediction

This notebook demonstrates how to predict Customer Lifetime Value using a synthetic e-commerce dataset. We perform feature engineering, train a regression model, and evaluate its performance.

In [5]:
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt



ModuleNotFoundError: No module named 'pandas'

In [None]:
# Load dataset
df = pd.read_csv('transactions_sample.csv')
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])
df.head()

In [None]:
# Create total amount column
df['TotalAmount'] = df['Quantity'] * df['UnitPrice']

# Snapshot date for recency calculation
snapshot_date = df['InvoiceDate'].max() + pd.Timedelta(days=1)

# Aggregate customer features
customer_df = df.groupby('CustomerID').agg({
    'InvoiceDate': [lambda x: (snapshot_date - x.max()).days,  # Recency
                    lambda x: (x.max() - x.min()).days],        # Tenure
    'InvoiceNo': 'count',                                       # Frequency
    'TotalAmount': 'sum'                                        # Monetary
})
customer_df.columns = ['Recency', 'Tenure', 'Frequency', 'Monetary']
customer_df['AOV'] = customer_df['Monetary'] / customer_df['Frequency']
customer_df.head()

In [None]:
# Train a regression model
X = customer_df[['Recency', 'Tenure', 'Frequency', 'AOV']]
y = customer_df['Monetary']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestRegressor()
model.fit(X_train, y_train)
predictions = model.predict(X_test)


In [None]:
# Evaluate the model
mae = mean_absolute_error(y_test, predictions)
rmse = mean_squared_error(y_test, predictions, squared=False)
print(f'MAE: {mae:.2f}')
print(f'RMSE: {rmse:.2f}')

In [None]:
# Predict and segment customers
customer_df['Predicted_LTV'] = model.predict(X)
customer_df['Segment'] = pd.qcut(customer_df['Predicted_LTV'], q=3, labels=['Low', 'Medium', 'High'])
customer_df.head()