In [None]:
 
def retention(df):    
    # Convert 'first_join_date' and 'event_time' to datetime format and extract month
    df['first_join_month'] = pd.to_datetime(df['first_join_date']).dt.to_period('M')
    df['event_month'] = pd.to_datetime(df['event_time']).dt.to_period('M')

    # Calculate the difference in months between the event month and the first join month
    df['month_diff'] = (df['event_month'] - df['first_join_month']).apply(lambda x: x.n)

    # Filter only 'BOOKING' events
    booking_data = df[df['event_name'] == 'BOOKING']


    # Group by first_join_month and month_diff, then count unique customers for each group
    cohort_counts = booking_data.groupby(['first_join_month', 'month_diff']).agg(n_customers=('customer_id', 'nunique')).reset_index(drop=False)

    # Pivot the data to create the retention matrix
    retention_matrix = cohort_counts.pivot(index='first_join_month', columns='month_diff', values='n_customers')

    # Calculate the retention rate by dividing each month's value by the cohort size (i.e., the number of customers in the first month)
    cohort_sizes = retention_matrix.iloc[:, 0]
    retention_rate = retention_matrix.divide(cohort_sizes, axis=0)

    retention_rate = retention_rate.dropna(how='all', axis=0).dropna(how='all', axis=1)
    
    # Plot the heatmap
    plt.figure(figsize=(60, 10))
    sns.heatmap(retention_rate, annot=True, fmt=".2", cmap="YlGnBu", vmin=0.0, vmax=1.0)

    plt.title('2_Monthly Cohort Retention')
    plt.xlabel('Months Since First Purchase')
    plt.ylabel('First Purchase Month')
    plt.show()


    return retention_rate
