In [None]:
from src.stats_utils import fit_retention_curve, retention_formula, calculate_daily_active_users, calculate_total_revenue, percentage_difference
from src.visual_utils import show_retention_curve

TASK 1:

In [None]:
installs_per_day = 20_000

days = [1, 3, 7, 14]
retention_A = [0.53, 0.27, 0.17, 0.06]
retention_B = [0.48, 0.25, 0.19, 0.09]

daily_purchase_ratio_A, daily_purchase_ratio_B = [0.0305, 0.0315]
ecpm_A, ecpm_B = [9.8, 10.8]
ad_over_dau_A, ad_over_dau_B = [2.3, 1.6] # Ad impression / DAU

In [None]:
param_A = fit_retention_curve(days, retention_A)
param_B  = fit_retention_curve(days, retention_B)

print(f"Fitted parameters for Variation A are: {param_A}\n")
print(f"Fitted parameters for Variation B are: {param_B}\n")

show_retention_curve(days, retention_A, param_A)
show_retention_curve(days, retention_B, param_B)


Part A)

Daily active users after 15 days:<br>
Variant A: 50011<br>
Variant B: 54454<br>
Percentage difference: 8.88%

Variant B has more DAU.

In [None]:

DAU_A = calculate_daily_active_users(15, installs_per_day, param_A)
DAU_B = calculate_daily_active_users(15, installs_per_day, param_B)

print(f"DAU A: {DAU_A}\nDAU B: {DAU_B}")
print(f"Percentage difference: {percentage_difference(DAU_A, DAU_B):.2f}%")


Part B)

Total revenue for variants by day 15:<br>
Variant A: 21054.2111<br>
Variant B: 22383.045<br>
Percentage difference: 6.31%<br>

Variant B earned more.

------------------------

Assuming average money spent by each player per day is same (or negligibly different)

AMSPP = Average Money spent by each player per day<br>
DPR = Total Purchase / DAU<br>
eCPM = Ad revenue per 1000 impression<br>

------------------------

Total revenue = sum(Total revenue per day)<br>
Total revenue per day = ad revenue per day + purchase revenue per day<br>
Purchase revenue per day = DPR * DAU * AMSPP<br>
Ad revenue per day = DAU * eCPM / 1000<br>

In [None]:
total_revenue_A = calculate_total_revenue(1, 15, daily_purchase_ratio_A, ecpm_A, installs_per_day, param_A)
total_revenue_B = calculate_total_revenue(1, 15, daily_purchase_ratio_B, ecpm_B, installs_per_day, param_B)

print(f"For 15 days:\nTotal revenue A: {total_revenue_A}\nTotal revenue B: {total_revenue_B}\n")
print(f"Percentage difference: {percentage_difference(total_revenue_A, total_revenue_B):.2f}%")

Part C)

Total revenue for variants by day 30:<br>
Variant A: 52128.74<br>
Variant B: 58998.43<br>
Percentage difference: 13.18%<br>

Total revenue of variant B is more, and its much more significant compared to 15 days revenue.

In [None]:
total_revenue_A = calculate_total_revenue(1, 30, daily_purchase_ratio_A, ecpm_A, installs_per_day, param_A)
total_revenue_B = calculate_total_revenue(1, 30, daily_purchase_ratio_B, ecpm_B, installs_per_day, param_B)
print(f"For 30 days:\nTotal revenue A: {total_revenue_A:.2f}\nTotal revenue B: {total_revenue_B:.2f}\n")
print(f"Percentage difference: {percentage_difference(total_revenue_A, total_revenue_B):.2f}%")


Part D)

For 30 days, with 10-day sale starting 15th:<br>
Total revenue A: 57243.03<br>
Total revenue B: 64697.38<br>
Percentage difference: 13.02%<br>

Both revenues increased with the sale. However, relative difference is the same since percentage is the same.<br>
Variant B is still better.<br>

--------------

10-day sale starting on 15th day.<br>
Assuming retentions and installs will be unaffected.<br>
Assuming that day will also be affected by sale. (15th in this case)<br>

What is total revenue by day 30? Which variant is better?

Only change is on purchase rate.<br>
Day 1 -> 15 (excluding): ordinary DPR<br>
Day 15 -> 25 (excluding): 1% more DPR<br>
Day 25 -> 30 (excluding): ordinary DPR<br>
Sum of these will give the total revenue.

In [None]:
revenue_1_A = calculate_total_revenue(1, 15, daily_purchase_ratio_A, ecpm_A, installs_per_day, param_A)
revenue_2_A = calculate_total_revenue(15, 25, daily_purchase_ratio_A + 0.01, ecpm_A, installs_per_day, param_A)
revenue_3_A = calculate_total_revenue(25, 30, daily_purchase_ratio_A, ecpm_A, installs_per_day, param_A)
total_revenue_A = revenue_1_A + revenue_2_A + revenue_3_A

revenue_1_B = calculate_total_revenue(1, 15, daily_purchase_ratio_B, ecpm_B, installs_per_day, param_B)
revenue_2_B = calculate_total_revenue(15, 25, daily_purchase_ratio_B + 0.01, ecpm_B, installs_per_day, param_B)
revenue_3_B = calculate_total_revenue(25, 30, daily_purchase_ratio_B, ecpm_B, installs_per_day, param_B)
total_revenue_B = revenue_1_B + revenue_2_B + revenue_3_B

print(f"For 30 days, with 10-day sale starting 15th:\nTotal revenue A: {total_revenue_A:.2f}\nTotal revenue B: {total_revenue_B:.2f}\n")
print(f"Percentage difference: {percentage_difference(total_revenue_A, total_revenue_B):.2f}%")


Part E)

For 30 days, with new source introduced on 20th:<br>
Total revenue A: 59392.37<br>
Total revenue B: 65906.49<br>

Percentage difference: 10.97%<br>

These results are the best the variants have got so far and Variant A seems to be benefiting more from this update.
Still, although percentage difference have decreased, B is still a better choice.

-------------

On day 20, new rules apply (including 20th day). 

Updated retention parameters for:<br>
Variant A: a = 0.58, b = 0.12<br>
Variant B: a = 0.52, b = 0.10<br>

Updated installs_per_day values for:<br>
Old users: 12,000<br>
New users: 8,000<br>

Revenue calculation is same for days until 20.<br>
From 20 to 30,
    12,000 installs with old parameters (retention)
    8,000 installs with new parameters (retention)

In [None]:
new_param_A = [0.58, 0.12]
new_param_B = [0.52, 0.10]

old_installs_per_day = 12_000 # Means old source installs
new_installs_per_day = 8_000

revenue_20_A = calculate_total_revenue(1, 20, daily_purchase_ratio_A, ecpm_A, installs_per_day, param_A)
revenue_old_A = calculate_total_revenue(20, 30, daily_purchase_ratio_A, ecpm_A, old_installs_per_day, param_A)
revenue_new_A = calculate_total_revenue(20, 30, daily_purchase_ratio_A, ecpm_A, new_installs_per_day, new_param_A)
total_revenue_A = revenue_20_A + revenue_old_A + revenue_new_A

revenue_20_B = calculate_total_revenue(1, 20, daily_purchase_ratio_B, ecpm_B, installs_per_day, param_B)
revenue_old_B = calculate_total_revenue(20, 30, daily_purchase_ratio_B, ecpm_B, old_installs_per_day, param_B)
revenue_new_B = calculate_total_revenue(20, 30, daily_purchase_ratio_B, ecpm_B, new_installs_per_day, new_param_B)
total_revenue_B = revenue_20_B + revenue_old_B + revenue_new_B

print(f"For 30 days, with new source introduced on 20th:\nTotal revenue A: {total_revenue_A:.2f}\nTotal revenue B: {total_revenue_B:.2f}\n")
print(f"Percentage difference: {percentage_difference(total_revenue_A, total_revenue_B):.2f}%")


Part F)

Assuming there aren't any significant difference such as cost, resource usage etc.
Just from the revenue perspective, adding a new source is a better choice.