In [10]:
# Comprehensive A/B Test Analysis for Monetization Strategy
!pip install statsmodels
!pip install numpy==1.23.5 autograd==1.5.0 lifelines==0.27.0

# Revised A/B Test Analysis for Monetization Strategy

# Revised A/B Test Analysis for Monetization Strategy

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from datetime import datetime, timedelta
from scipy.stats import ttest_ind

# Set up basic plot aesthetics
plt.style.use('default')  # Use the default Matplotlib style
plt.rcParams['figure.figsize'] = (10, 6)
plt.rcParams['axes.grid'] = True
plt.rcParams['grid.alpha'] = 0.3

# Set a color palette manually
color_palette = ['#1f77b4', '#ff7f0e']  # Blue and Orange

# Load and preprocess the data
def load_data(file_path):
    df = pd.read_json(file_path, lines=True)
    df['event_time'] = pd.to_datetime(df['event_time'], unit='ms')
    df['first_event_time'] = pd.to_datetime(df['first_event_time'], unit='ms')
    df['days_since_first_event'] = (df['event_time'] - df['first_event_time']).dt.total_seconds() / (24 * 60 * 60)
    return df

df = load_data('data/dataset_experiment.json')

# Continue with the rest of the analysis...
# 1. Data Overview and Cleaning
print("1. Data Overview and Cleaning")
print("-----------------------------")
print(df.info())
print("\nMissing values:")
print(df.isnull().sum())

# Remove any duplicate events
df = df.drop_duplicates()

# Ensure all numeric columns are properly typed
df['revenue'] = pd.to_numeric(df['revenue'], errors='coerce')

print("\nUnique event types:")
print(df['event_name'].value_counts())

# 2. Experiment Setup Analysis
print("\n2. Experiment Setup Analysis")
print("-----------------------------")
experiment_duration = (df['first_event_time'].max() - df['first_event_time'].min()).days
print(f"Experiment duration: {experiment_duration} days")

variant_counts = df['experiment_variant'].value_counts()
print("\nDistribution of experiment variants:")
print(variant_counts)

# Chi-square test for equal distribution
chi2, p_value = stats.chisquare(variant_counts)
print(f"\nChi-square test for equal distribution: p-value = {p_value:.4f}")

plt.figure()
variant_counts.plot(kind='bar', color=color_palette)
plt.title('Distribution of Experiment Variants')
plt.xlabel('Variant')
plt.ylabel('Count')
plt.tight_layout()
plt.show()


# 3. User Acquisition and Retention
print("\n3. User Acquisition and Retention")
print("----------------------------------")
df_users = df.groupby('user_id').agg({
    'first_event_time': 'min',
    'event_time': 'max',
    'experiment_variant': 'first'
}).reset_index()
df_users['retention_days'] = (df_users['event_time'] - df_users['first_event_time']).dt.total_seconds() / (24 * 60 * 60)

plt.figure(figsize=(12, 6))
sns.histplot(data=df_users, x='first_event_time', hue='experiment_variant', multiple='stack', bins=30)
plt.title('User Acquisition Over Time')
plt.xlabel('First Event Time')
plt.ylabel('Number of Users')
plt.show()

# 4. Conversion Analysis
print("\n4. Conversion Analysis")
print("----------------------")
def get_conversion_rate(df, variant):
    total_users = df[df['experiment_variant'] == variant]['user_id'].nunique()
    converted_users = df[(df['experiment_variant'] == variant) & (df['event_name'] == 'subscribe')]['user_id'].nunique()
    return converted_users / total_users

conv_rate_a = get_conversion_rate(df, 'A')
conv_rate_b = get_conversion_rate(df, 'B')

print(f"Conversion rate for Variant A: {conv_rate_a:.2%}")
print(f"Conversion rate for Variant B: {conv_rate_b:.2%}")

# Z-test for conversion rates
total_users_a = df[df['experiment_variant'] == 'A']['user_id'].nunique()
total_users_b = df[df['experiment_variant'] == 'B']['user_id'].nunique()
converted_users_a = df[(df['experiment_variant'] == 'A') & (df['event_name'] == 'subscribe')]['user_id'].nunique()
converted_users_b = df[(df['experiment_variant'] == 'B') & (df['event_name'] == 'subscribe')]['user_id'].nunique()

z_stat, p_value = stats.proportions_ztest([converted_users_a, converted_users_b], [total_users_a, total_users_b])
print(f"Z-test for conversion rates: p-value = {p_value:.4f}")

# 5. Revenue and LTV Analysis
print("\n5. Revenue and LTV Analysis")
print("---------------------------")
def calculate_ltv(group):
    revenue = group[group['event_name'] == 'subscribe']['revenue'].sum() - \
              group[group['event_name'] == 'refund']['revenue'].sum()
    return pd.Series({'ltv': revenue})

user_ltv = df.groupby(['user_id', 'experiment_variant']).apply(calculate_ltv).reset_index()

ltv_by_variant = user_ltv.groupby('experiment_variant')['ltv'].agg(['mean', 'median', 'count'])
print("LTV by Variant:")
print(ltv_by_variant)

plt.figure(figsize=(10, 6))
sns.boxplot(x='experiment_variant', y='ltv', data=user_ltv)
plt.title('Distribution of LTV by Variant')
plt.xlabel('Variant')
plt.ylabel('LTV (USD)')
plt.show()

# Mann-Whitney U test for LTV
variant_a = user_ltv[user_ltv['experiment_variant'] == 'A']['ltv']
variant_b = user_ltv[user_ltv['experiment_variant'] == 'B']['ltv']
statistic, p_value = stats.mannwhitneyu(variant_a, variant_b, alternative='two-sided')

print(f"Mann-Whitney U test for LTV: p-value = {p_value:.4f}")

# 6. Subscription Duration Analysis
print("\n6. Subscription Duration Analysis")
print("----------------------------------")
subscription_durations = df[df['event_name'] == 'subscribe']['productDuration'].value_counts()
print("Distribution of subscription durations:")
print(subscription_durations)

plt.figure(figsize=(10, 6))
sns.countplot(x='productDuration', hue='experiment_variant', data=df[df['event_name'] == 'subscribe'])
plt.title('Distribution of Subscription Durations by Variant')
plt.xlabel('Duration')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()

# Chi-square test for subscription duration distribution
duration_counts = pd.crosstab(df[df['event_name'] == 'subscribe']['productDuration'], 
                              df[df['event_name'] == 'subscribe']['experiment_variant'])
chi2, p_value, dof, expected = stats.chi2_contingency(duration_counts)
print(f"Chi-square test for subscription duration distribution: p-value = {p_value:.4f}")

# 7. Churn and Retention Analysis
print("\n7. Churn and Retention Analysis")
print("--------------------------------")
def get_churn_rate(df, variant):
    subscribed_users = df[(df['experiment_variant'] == variant) & (df['event_name'] == 'subscribe')]['user_id'].nunique()
    churned_users = df[(df['experiment_variant'] == variant) & (df['event_name'] == 'auto_renew_off')]['user_id'].nunique()
    return churned_users / subscribed_users

churn_rate_a = get_churn_rate(df, 'A')
churn_rate_b = get_churn_rate(df, 'B')

print(f"Churn rate for Variant A: {churn_rate_a:.2%}")
print(f"Churn rate for Variant B: {churn_rate_b:.2%}")

# Z-test for churn rates
subscribed_users_a = df[(df['experiment_variant'] == 'A') & (df['event_name'] == 'subscribe')]['user_id'].nunique()
subscribed_users_b = df[(df['experiment_variant'] == 'B') & (df['event_name'] == 'subscribe')]['user_id'].nunique()
churned_users_a = df[(df['experiment_variant'] == 'A') & (df['event_name'] == 'auto_renew_off')]['user_id'].nunique()
churned_users_b = df[(df['experiment_variant'] == 'B') & (df['event_name'] == 'auto_renew_off')]['user_id'].nunique()

z_stat, p_value = stats.proportions_ztest([churned_users_a, churned_users_b], [subscribed_users_a, subscribed_users_b])
print(f"Z-test for churn rates: p-value = {p_value:.4f}")

# 8. Refund Analysis
print("\n8. Refund Analysis")
print("------------------")
def get_refund_rate(df, variant):
    subscribed_users = df[(df['experiment_variant'] == variant) & (df['event_name'] == 'subscribe')]['user_id'].nunique()
    refunded_users = df[(df['experiment_variant'] == variant) & (df['event_name'] == 'refund')]['user_id'].nunique()
    return refunded_users / subscribed_users

refund_rate_a = get_refund_rate(df, 'A')
refund_rate_b = get_refund_rate(df, 'B')

print(f"Refund rate for Variant A: {refund_rate_a:.2%}")
print(f"Refund rate for Variant B: {refund_rate_b:.2%}")

# Z-test for refund rates
refunded_users_a = df[(df['experiment_variant'] == 'A') & (df['event_name'] == 'refund')]['user_id'].nunique()
refunded_users_b = df[(df['experiment_variant'] == 'B') & (df['event_name'] == 'refund')]['user_id'].nunique()

z_stat, p_value = stats.proportions_ztest([refunded_users_a, refunded_users_b], [subscribed_users_a, subscribed_users_b])
print(f"Z-test for refund rates: p-value = {p_value:.4f}")

# 9. Time-based Analysis
print("\n9. Time-based Analysis")
print("----------------------")
df_time = df.set_index('event_time').resample('D').agg({
    'revenue': 'sum',
    'user_id': 'nunique'
}).reset_index()

df_time['cumulative_revenue'] = df_time.groupby('experiment_variant')['revenue'].cumsum()
df_time['cumulative_users'] = df_time.groupby('experiment_variant')['user_id'].cumsum()

plt.figure(figsize=(12, 6))
sns.lineplot(x='event_time', y='cumulative_revenue', hue='experiment_variant', data=df_time)
plt.title('Cumulative Revenue Over Time')
plt.xlabel('Date')
plt.ylabel('Cumulative Revenue (USD)')
plt.show()

# 10. Cohort Analysis
print("\n10. Cohort Analysis")
print("-------------------")
df['cohort'] = df['first_event_time'].dt.to_period('W')
df['week'] = (df['event_time'].dt.to_period('W') - df['cohort']).apply(lambda r: r.n)

cohort_data = df.groupby(['cohort', 'week', 'experiment_variant'])['user_id'].nunique().unstack(level='experiment_variant')
retention_data = cohort_data.div(cohort_data.iloc[:, 0], axis=0)

plt.figure(figsize=(12, 8))
sns.heatmap(retention_data, annot=True, fmt='.2%', cmap='YlGnBu')
plt.title('Cohort Retention Heatmap')
plt.xlabel('Week')
plt.ylabel('Cohort')
plt.show()

# 11. LTV Projection
print("\n11. LTV Projection")
print("------------------")
def project_ltv(df, days):
    df_proj = df[df['days_since_first_event'] <= days].copy()
    user_ltv = df_proj.groupby(['user_id', 'experiment_variant']).apply(calculate_ltv).reset_index()
    return user_ltv.groupby('experiment_variant')['ltv'].mean()

ltv_30 = project_ltv(df, 30)
ltv_90 = project_ltv(df, 90)
ltv_180 = project_ltv(df, 180)
ltv_365 = project_ltv(df, 365)

print("Projected LTV by variant:")
print(pd.DataFrame({
    '30 days': ltv_30,
    '90 days': ltv_90,
    '180 days': ltv_180,
    '365 days': ltv_365
}))

# 12. Conclusion and Recommendations
print("\n12. Conclusion and Recommendations")
print("-----------------------------------")
winning_variant = 'B' if ltv_by_variant.loc['B', 'mean'] > ltv_by_variant.loc['A', 'mean'] else 'A'

print(f"Based on our comprehensive analysis, Variant {winning_variant} appears to be the winning variant in terms of overall Lifetime Value (LTV).")

print("\nKey Findings:")
print(f"1. LTV: Variant {winning_variant} shows a higher average LTV.")
print(f"2. Conversion Rate: Variant {'B' if conv_rate_b > conv_rate_a else 'A'} has a higher conversion rate.")
print(f"3. Churn Rate: Variant {'B' if churn_rate_b < churn_rate_a else 'A'} demonstrates a lower churn rate.")
print(f"4. Refund Rate: Variant {'B' if refund_rate_b < refund_rate_a else 'A'} has a lower refund rate.")

if p_value < 0.05:
    print(f"\nThe difference in LTV between variants is statistically significant (p-value: {p_value:.4f}).")
else:
    print(f"\nThe difference in LTV between variants is not statistically significant (p-value: {p_value:.4f}).")

print("\nRecommendations:")
if p_value < 0.05:
    print(f"1. Implement Variant {winning_variant}: Given the statistically significant results, we recommend rolling out Variant {winning_variant} to all users.")
    print(f"2. Further Optimization: Conduct follow-up experiments to fine-tune Variant {winning_variant} and potentially improve its performance even more.")
else:
    print("1. Extended Testing: While there are observable differences, they are not statistically significant. Consider extending the experiment duration or increasing the sample size to achieve statistical significance.")
    print("2. Segment Analysis: Conduct a deeper analysis of user segments to identify any subgroups where one variant clearly outperforms the other.")

print("\n3. User Experience Analysis: Investigate the factors contributing to the differences in conversion, churn, and refund rates between variants. This may involve user surveys or session recording analysis.")
print("4. LTV Projection Refinement: Continuously refine the LTV projection model as more long-term data becomes available. This will improve future decision-making and experiment evaluation.")
print("5. Cohort Analysis Follow-up: Pay close attention to the performance of recent cohorts. If newer cohorts show stronger preference for one variant, it may indicate shifting user preferences or market conditions.")
print("6. Subscription Duration Strategy: Based on the distribution of subscription durations, consider adjusting the offering or pricing strategy to encourage longer subscription periods in the winning variant.")

print("\nNext Steps:")
print("1. Present findings to key stakeholders and decide on the implementation strategy.")
print("2. Develop a roadmap for future experiments based on the insights gained from this test.")
print("3. Set up a system for continuous monitoring of key metrics (LTV, conversion rate, churn rate) post-implementation.")
print("4. Conduct a retrospective on the A/B testing process to identify areas for improvement in future experiments.")

# Visualization of Key Metrics
metrics = pd.DataFrame({
    'Metric': ['LTV', 'Conversion Rate', 'Churn Rate', 'Refund Rate'],
    'Variant A': [ltv_by_variant.loc['A', 'mean'], conv_rate_a, churn_rate_a, refund_rate_a],
    'Variant B': [ltv_by_variant.loc['B', 'mean'], conv_rate_b, churn_rate_b, refund_rate_b]
})

plt.figure(figsize=(12, 6))
metrics.set_index('Metric').plot(kind='bar')
plt.title('Comparison of Key Metrics Between Variants')
plt.xlabel('Metric')
plt.ylabel('Value')
plt.legend(title='Variant')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Final LTV Projection Visualization
projection_data = pd.DataFrame({
    'Days': [30, 90, 180, 365],
    'Variant A': [ltv_30['A'], ltv_90['A'], ltv_180['A'], ltv_365['A']],
    'Variant B': [ltv_30['B'], ltv_90['B'], ltv_180['B'], ltv_365['B']]
})

plt.figure(figsize=(12, 6))
sns.lineplot(x='Days', y='value', hue='variable', data=pd.melt(projection_data, ['Days']))
plt.title('LTV Projection Over Time')
plt.xlabel('Days')
plt.ylabel('Projected LTV (USD)')
plt.legend(title='Variant')
plt.show()

print("\nThis concludes our comprehensive A/B test analysis. The visualizations above summarize our key findings and projections.")

# Additional analysis: Effect size calculation
ltv_mean_diff = ltv_by_variant.loc['B', 'mean'] - ltv_by_variant.loc['A', 'mean']
ltv_pooled_std = np.sqrt((ltv_by_variant.loc['A', 'count'] * variant_a.std()**2 + 
                          ltv_by_variant.loc['B', 'count'] * variant_b.std()**2) / 
                         (ltv_by_variant.loc['A', 'count'] + ltv_by_variant.loc['B', 'count'] - 2))
effect_size = ltv_mean_diff / ltv_pooled_std

print(f"\nEffect Size (Cohen's d) for LTV difference: {effect_size:.4f}")

if abs(effect_size) < 0.2:
    print("This indicates a small effect size.")
elif abs(effect_size) < 0.5:
    print("This indicates a medium effect size.")
else:
    print("This indicates a large effect size.")

print("\nFinal Thoughts:")
print("1. Statistical vs. Practical Significance: While we've focused on statistical significance, it's crucial to consider the practical significance of the observed differences. Even if not statistically significant, the differences in key metrics may still be practically important for the business.")
print("2. Long-term Impact: The LTV projections suggest that the differences between variants may compound over time. Continue monitoring long-term trends to validate these projections.")
print("3. Balancing Metrics: While LTV is our primary metric, it's important to consider the balance between all metrics (conversion rate, churn rate, refund rate) when making the final decision.")
print("4. Iterative Improvement: Regardless of which variant is chosen, there's always room for improvement. Use the insights from this test to inform future iterations and experiments.")
print("5. Customer Satisfaction: Don't forget to consider qualitative feedback and customer satisfaction metrics alongside these quantitative results.")

print("\nBy considering all these factors, you'll be well-equipped to make an informed decision about your monetization strategy and set the stage for continued optimization and growth.")


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
1. Data Overview and Cleaning
-----------------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23772 entries, 0 to 23771
Data columns (total 7 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   experiment_variant      23772 non-null  object        
 1   event_name              23772 non-null  object        
 2   user_id                 23772 non-null  object        
 3   first_event_time        2

TypeError: unhashable type: 'list'