# Cohort Retention Analysis
This notebook loads the synthetic lifecycle dataset, builds weekly cohorts, and visualizes player retention.

In [None]:
# Import core analysis libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import Path

# Configure plotting aesthetics for consistent visuals
sns.set_theme(style='whitegrid', palette='Blues')


In [None]:
# Resolve the dataset path relative to the notebook location
data_path = Path('..') / 'data' / 'synthetic_user_events.csv'

# Load the synthetic event log and parse dates for cohort calculations
df = pd.read_csv(data_path, parse_dates=['registration_date', 'event_date'])

# Inspect the first few records to verify structure
df.head()


In [None]:
# Align registration and event timestamps to their ISO week starts
df['registration_week'] = df['registration_date'] - pd.to_timedelta(df['registration_date'].dt.weekday, unit='D')
df['event_week'] = df['event_date'] - pd.to_timedelta(df['event_date'].dt.weekday, unit='D')

# Calculate how many users registered each week (cohort size)
cohort_sizes = (
    df[['user_id', 'registration_week']]
    .drop_duplicates()
    .groupby('registration_week')
    .size()
)

# Determine the week offset between each event and the player's registration
df['cohort_index'] = ((df['event_week'] - df['registration_week']) / np.timedelta64(1, 'W')).round().astype(int)

# Count unique active users by cohort and week offset
weekly_activity = (
    df.groupby(['registration_week', 'cohort_index'])['user_id']
    .nunique()
    .reset_index(name='active_users')
)

# Pivot into a retention matrix (cohorts x weeks) and limit to the first 12 weeks
retention_matrix = (
    weekly_activity.pivot_table(
        index='registration_week',
        columns='cohort_index',
        values='active_users'
    )
    .sort_index()
    .loc[:, 0:11]
)

# Convert counts to retention percentages by dividing by cohort sizes
retention_rates = retention_matrix.div(cohort_sizes, axis=0).fillna(0)
retention_rates.head()


In [None]:
# Plot a heatmap to visualize the retention decay across cohorts
plt.figure(figsize=(12, 6))
heatmap = sns.heatmap(
    retention_rates,
    annot=True,
    fmt='.0%',
    cmap='Blues',
    cbar_kws={'label': 'Retention'}
)
plt.title('Weekly Retention by Registration Cohort')
plt.xlabel('Weeks Since Registration')
plt.ylabel('Registration Week')
plt.tight_layout()

# Save the heatmap to the reports directory and display it inline
output_path = Path('/Users/den/Desktop/analytics-portfolio/retention_cohort_dashboard/reports/retention_heatmap.png')
output_path.parent.mkdir(parents=True, exist_ok=True)
plt.savefig(output_path, dpi=300)
plt.show()
print(f'Saved heatmap to {output_path.resolve()}')
