# King Experiment Test Analysis

In [None]:
FIG_SIZE = (10, 5)

In [None]:

# from src.Config import Config
import matplotlib.pyplot as plt
import seaborn as sns




def plot_daily_trend(column_name, daily_metrics, title):
    plt.figure(figsize=FIG_SIZE)
    ax = sns.lineplot(
        daily_metrics, x=daily_metrics.index, y=column_name, hue=Config.VARIATION
    )
    plt.legend(title="Group", title_fontsize="25", fontsize="20")
    plt.xticks(fontsize=15, fontfamily="serif")
    plt.yticks(fontsize=15, fontfamily="serif")
    plt.xlabel("")
    plt.ylabel("")

    ax.axvline(
        x=Config.EXPERIMENT_START_DATE,
        ymin=0,
        ymax=1,
        linestyle="dashed",
        color="g",
    )
    plt.text(
        Config.EXPERIMENT_START_DATE,
        daily_metrics[column_name].min(),
        "TREATMENT",
        rotation=90,
    )
    plt.xticks(rotation=30)
    plt.title(title, fontsize=40)

In [None]:
def get_groups_daily_assignment(assignment_df):
    group_assignment = (
        assignment_df.groupby(["assignment_date", "abtest_group"])
        .size()
        .unstack(fill_value=0)
    )
    group_assignment["total"] = group_assignment.sum(axis=1)

    for group in group_assignment.columns[:-1]:
        group_assignment[f"ratio_{group}"] = (
            group_assignment[group] / group_assignment["total"]
        )
    group_assignment.reset_index(inplace=True)
    group_assignment.columns = [
        "assignment_date",
        "group_A_assignment",
        "group_B_assignment",
        "population_size",
        "ratio_A",
        "ratio_B",
    ]
    group_assignment["group_A_size"] = group_assignment["group_A_assignment"].cumsum()
    group_assignment["group_B_size"] = group_assignment["group_B_assignment"].cumsum()
    return group_assignment

In [None]:
import matplotlib.pyplot as plt
import numpy as np





def plot_stats(aggregated_data, stats, title):
    metrics = list(aggregated_data.columns)
    A_avg = aggregated_data.loc["A"].values
    B_avg = aggregated_data.loc["B"].values

    p_values_list = [stats[metric] for metric in metrics]

    fig, ax = plt.subplots(figsize=(10, 6))
    bar_height = 0.35
    y = np.arange(len(metrics))
    bars_A = ax.barh(y - bar_height / 2, A_avg, bar_height, label="Group A")
    bars_B = ax.barh(y + bar_height / 2, B_avg, bar_height, label="Group B")

    ax.set_xlabel("Average Value")
    ax.set_ylabel("Metrics")
    ax.set_title(f"Average Metrics Comparison _ {title}")
    ax.set_yticks(y)
    ax.set_yticklabels(metrics)
    ax.invert_yaxis()
    ax.legend()

    for i, (bar_A, bar_B) in enumerate(zip(bars_A, bars_B)):
        ax.text(
            bar_A.get_width() + 0.5,
            bar_A.get_y() + bar_A.get_height() / 2,
            f"p={p_values_list[i]:.3f}",
            va="center",
            ha="left",
            fontsize=10,
        )
        ax.text(
            bar_B.get_width() + 0.5,
            bar_B.get_y() + bar_B.get_height() / 2,
            f"p={p_values_list[i]:.3f}",
            va="center",
            ha="left",
            fontsize=10,
        )

    plt.tight_layout()
    plt.show()

## Load Data

In [None]:
from google.cloud import bigquery
from src.Config import Config
from src.data.big_query_loader import BigQueryLoader

assignment_activity = BigQueryLoader.load_data(
    query_file="assignment_activity.sql", parquet_file="assignment_activity"
)
historical_activity = BigQueryLoader.load_data(
    query_file="historical_activity.sql", parquet_file="historical_activity"
)

parameters = {"study_start_date": ("STRING", Config.STUDY_START_DATE)}
daily_metrics = BigQueryLoader.load_data(
    query_file="daily_metrics.sql", query_parameters_map=parameters
)

## Experiment Population Analysis

In [None]:
variation_size = assignment_activity.groupby("abtest_group")["assignment_date"].count()
group_A_size = int(variation_size["A"])
group_B_size = int(variation_size["B"])
total_exp_population = assignment_activity.shape[0]
group_B_ratio = group_B_size / total_exp_population
group_A_ratio = group_A_size / total_exp_population

print(group_A_size, group_B_size, total_exp_population)
print(f"GROUP B RATIO: {group_B_ratio}", f"GROUP A RATIO: {group_A_ratio}")

In [None]:
group_assignment = get_groups_daily_assignment(assignment_activity)

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots()
plott = ax.pie(
    [group_A_size, group_B_size],
    labels=["Control(A)", "Variation(B)"],
    autopct="%1.1f%%",
    # colors=["saddlebrown", "gray"],
    pctdistance=1.2,
    labeldistance=0.3,
    # radius=0.5
)

In [None]:
plt.figure(figsize=FIG_SIZE)
ax = sns.lineplot(group_assignment.set_index("assignment_date")[["ratio_A", "ratio_B"]])
a = plt.xticks(rotation=30)
plt.show()

plt.figure(figsize=FIG_SIZE)
ax = sns.lineplot(
    group_assignment.set_index("assignment_date")[["group_A_size", "group_B_size"]]
)
a = plt.xticks(rotation=30)
plt.show()

## Experimetn Daily Analysis

In [None]:
import pandas as pd

daily_metrics = pd.merge(
    daily_metrics,
    group_assignment[["assignment_date", "group_B_size", "group_A_size"]],
    left_on="activity_date",
    right_on="assignment_date",
    how="left",
)
daily_metrics["group_A_size"] = daily_metrics["group_A_size"].fillna(0)
daily_metrics["group_B_size"] = daily_metrics["group_B_size"].fillna(0)

treatment_daily_metrics = daily_metrics[
    daily_metrics["activity_date"] >= Config.EXPERIMENT_START_DATE
]
treatment_daily_metrics.set_index("activity_date", inplace=True)
daily_metrics.set_index("activity_date", inplace=True)

### Active Player

In [None]:
treatment_daily_metrics["daily_active_player_percentage"] = (
    treatment_daily_metrics.apply(
        lambda row: (
            row.daily_active_player / row.group_B_size
            if row.abtest_group == "B"
            else row.daily_active_player / row.group_A_size
        ),
        axis=1,
    )
)

plot_daily_trend(
    "daily_active_player_percentage",
    treatment_daily_metrics,    
    title="Daily Active Player Percentage",
)
plt.show()

### Conversion Rate

In [None]:
treatment_daily_metrics["total_conversion_rate"] = treatment_daily_metrics.apply(
    lambda row: (
        row.total_num_purhcases / row.group_B_size
        if row.abtest_group == "B"
        else row.total_num_purhcases / row.group_A_size
    ),
    axis=1,
)
treatment_daily_metrics["active_player_conversion_rate"] = (
    treatment_daily_metrics.total_num_purhcases
    / treatment_daily_metrics.daily_active_player
)

plot_daily_trend(
    "total_conversion_rate", treatment_daily_metrics, title="Total Conversion Rate"
)
plt.show()

plot_daily_trend(
    "active_player_conversion_rate",
    treatment_daily_metrics,
    title="Active Player Conversion Rate",
)
plt.show()

### Revenue

In [None]:



    plot_daily_trend(
        "total_sum_purchase_per_player",
        treatment_daily_metrics,
        title="Total Sum of Purchases per Player",
    )
    plt.show()

    plot_daily_trend(
        "total_sum_purchase_per_active_player",
        treatment_daily_metrics,
        title="Total Sum of Purchases per Active Player",
    )
    plt.show()

    plot_daily_trend(
        "average_purchase_size", treatment_daily_metrics, title="Average Purchase Size"
    )
    plt.show()

### Game Rounds

In [None]:
treatment_daily_metrics["total_game_rounds_per_player"] = treatment_daily_metrics.apply(
    lambda row: (
        row.total_game_rounds / row.group_B_size
        if row.abtest_group == "B"
        else row.total_game_rounds / row.group_A_size
    ),
    axis=1,
)

daily_metrics["game_rounds_per_active_player"] = (
    daily_metrics.total_game_rounds / daily_metrics.daily_active_player
)
daily_metrics["motivated_players_ratio"] = (
    daily_metrics.total_motivated_players / daily_metrics.daily_active_player
)


plot_daily_trend(
    "total_game_rounds_per_player",
    treatment_daily_metrics,
    title="Total Game Rounds per Player",
)
plt.show()


plot_daily_trend(
    "game_rounds_per_active_player",
    daily_metrics,
    title="Game Rounds per Active Player",
)
plt.show()

plot_daily_trend(
    "motivated_players_ratio", daily_metrics, title="Motivated Players ratio"
)
plt.show()

## Statistical Analysis

### Feature Extraction

In [None]:
import pandas as pd
from datetime import datetime

assignment_activity["assignment_date"] = pd.to_datetime(
    assignment_activity.assignment_date
)
assignment_activity["install_date"] = pd.to_datetime(assignment_activity.install_date)
assignment_activity["age_in_app"] = (
    assignment_activity["assignment_date"] - assignment_activity["install_date"]
).dt.days

assignment_activity["conversion_date"] = pd.to_datetime(
    assignment_activity.conversion_date
)
assignment_activity["age_convert"] = (
    assignment_activity["conversion_date"] - assignment_activity["install_date"]
).dt.days
assignment_activity.set_index("playerid", inplace=True)

last_experiment_date = datetime.strptime("22-05-2017", "%d-%m-%Y")

assignment_activity["test_duration"] = assignment_activity.apply(
    lambda row: (last_experiment_date - row.assignment_date).days + 1, axis=1
)
assignment_activity["activity_count_per_day"] = (
    assignment_activity["activity_count"] / assignment_activity["test_duration"]
)

assignment_activity["converted_per_experiment_day"] = (
    assignment_activity["converted_days_count"] / assignment_activity["test_duration"]
)

assignment_activity["converted_per_active_day"] = (
    assignment_activity["converted_days_count"] / assignment_activity["activity_count"]
)

assignment_activity["purchase_per_experiment_day"] = (
    assignment_activity["purchase_sum"] / assignment_activity["test_duration"]
)

assignment_activity["purchase_per_active_day"] = (
    assignment_activity["purchase_sum"] / assignment_activity["activity_count"]
)
assignment_activity["gameends_per_experiment_day"] = (
    assignment_activity["gameends_sum"] / assignment_activity["test_duration"]
)

assignment_activity["motivation_per_experiment_day"] = (
    assignment_activity["motivated_days_count"] / assignment_activity["test_duration"]
)

assignment_activity["motivation_per_active_day"] = (
    assignment_activity["motivated_days_count"] / assignment_activity["activity_count"]
)

In [None]:
aggregated, stat_result = calc_stats(assignment_activity)

In [None]:
plot_stats(aggregated, stat_result, "All Players")

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt


# for i, col in enumerate(
#     assignment_activity.select_dtypes(["int64", "float64"]).columns
# ):
#     plt.figure(figsize=(16, 10))
#     # plt.subplot(3, 3, i + 1)
#     sns.violinplot(data=assignment_activity, x=col, hue=Config.VARIATION)
#     plt.legend(title='Group', title_fontsize='25', fontsize='20')
#     plt.xticks(fontsize=15, fontfamily='serif')
#     plt.yticks(fontsize=15, fontfamily='serif')
#     plt.xlabel('')
#     plt.ylabel('')

#     plt.title(col, fontsize=40)
#     plt.tight_layout()

#     plt.show()

### Validation Analysis

#### No Customer common between variations

In [None]:
# assignment_activity.groupby('playerid').filter(lambda g: len(g)>1)

#### Player in App Age

In [None]:
result = perform_test(assignment_activity, Config.VARIATION, "age_in_app", "ttest")
print(f"P-Value : {float(result.pvalue)}")
assignment_activity.groupby("abtest_group")["age_in_app"].mean()

#### Player Conversion State

In [None]:
conversion_data = assignment_activity[["age_convert", "age_in_app", "abtest_group"]]

conversion_data.groupby("abtest_group")["age_convert"].apply(
    lambda x: x.isnull().sum() / len(x)
)


##### Non Converted

In [None]:
from scipy.stats import chi2_contingency

contingency_table = pd.crosstab(
    conversion_data["abtest_group"], conversion_data["age_convert"].isnull()
)
chi2, p, dof, expected = chi2_contingency(contingency_table)
print("Chi-square statistic:", chi2)
print("p-value:", p)
print("Degrees of freedom:", dof)
print("Expected frequencies:", expected)

##### Before Treatment Converted

In [None]:
conversion_data["before_assignment_conversion"] = (
    conversion_data["age_convert"] < conversion_data["age_in_app"]
) & (conversion_data["age_convert"].isnull() == False)


contingency_table = pd.crosstab(
    conversion_data["abtest_group"], conversion_data["before_assignment_conversion"]
)
chi2, p, dof, expected = chi2_contingency(contingency_table)
print("Chi-square statistic:", chi2)
print("p-value:", p)
print("Degrees of freedom:", dof)
print("Expected frequencies:", expected)

##### After Treatment Converted

In [None]:
conversion_data["after_assignment_conversion"] = (
    conversion_data["age_convert"] >= conversion_data["age_in_app"]
) & (conversion_data["age_convert"].isnull() == False)

contingency_table = pd.crosstab(
    conversion_data["abtest_group"], conversion_data["after_assignment_conversion"]
)
chi2, p, dof, expected = chi2_contingency(contingency_table)
print("Chi-square statistic:", chi2)
print("p-value:", p)
print("Degrees of freedom:", dof)
print("Expected frequencies:", expected)

####  Players Profitability

In [None]:
historical_activity['purchase_avg']= historical_activity['hist_purchase_avg'].fillna(0)

result = perform_test(historical_activity, "hist_abtest_group", "purchase_avg", "ttest")
print(f"P-Value : {float(result.pvalue)}")
assignment_activity.groupby("abtest_group")["age_in_app"].mean()

## Segment Analysis

### High Engaged Players

In [None]:
assignment_activity.gameends_avg.mean()
high_engaged_players = assignment_activity[
    (assignment_activity["gameends_avg"] > 100)
]
print(high_engaged_players.age_convert.mean())
high_engaged_players_aggregated, high_engaged_players_stat_result = calc_stats(high_engaged_players)

In [None]:
high_engaged_players_aggregated

### Old Converted Players

In [None]:
old_converted_players = assignment_activity[
    (assignment_activity["age_in_app"] > 10)
    & (assignment_activity["age_in_app"] > assignment_activity["age_convert"])
    & (assignment_activity["age_convert"].isnull() == False)
]

old_converted_aggregated, old_converted_stat_result = calc_stats(old_converted_players)

### New Players

In [None]:
new_players = assignment_activity[
    (assignment_activity["age_in_app"] < 2)]
new_aggregated, new_stat_result = calc_stats(new_players)

### New Converted Players

In [None]:
new_converted_players = assignment_activity[
    (assignment_activity["age_in_app"] < 2)
    & (assignment_activity["age_in_app"] < assignment_activity["age_convert"])
    & (assignment_activity["age_convert"].isnull() == False)
]
new_converted_aggregated, new_converted_stat_result = calc_stats(new_converted_players)

### Non Converted Players

In [None]:
non_converted_players = assignment_activity[assignment_activity["age_convert"].isnull()]

non_converted_aggregated, non_converted_stat_result = calc_stats(non_converted_players)

### Active Players

In [None]:
active_players = assignment_activity[
    assignment_activity.index.isin(
        historical_activity[historical_activity.hist_activity_count > 10].index
    )
]
active_aggregated, active_stat_result = calc_stats(active_players)

### Profitable Players

In [None]:
profitable_players = assignment_activity[
    assignment_activity.index.isin(
        historical_activity[historical_activity.purchase_avg > 0].index
    )
]
profitable_aggregated, profitable_stat_result = calc_stats(profitable_players)

### All Plots

In [None]:
plot_stats(high_engaged_players_aggregated, high_engaged_players_stat_result, 'High Engaged')

In [None]:
plot_stats(profitable_aggregated, profitable_stat_result, "Profitable Players")

In [None]:
plot_stats(active_aggregated, active_stat_result, "Active Players")

In [None]:
plot_stats(old_converted_aggregated, old_converted_stat_result, "Old Converted Players")

In [None]:
plot_stats(new_aggregated, new_stat_result, "New Players")

In [None]:
plot_stats(new_converted_aggregated, new_converted_stat_result, "New Converted Players")

In [None]:
plot_stats(non_converted_aggregated, non_converted_stat_result, "None Converted")

## END of File

Metrics:

1. DAU and MAU: The daily- and monthly active users ratio measures the percentage of monthly active users who engage with the game on a daily basis, providing insights into the game’s stickiness and popularity over time. A high DAU/MAU ratio indicates strong user retention and frequent engagement, while a low ratio may signal potential churn or disinterest among users.

2. Retention: Retention tracks the number of players who continue to engage with the game over time, typically measured at various intervals such as day 1, day 7, and day 30. High retention rates mean players find your game compelling and will likely return. On the other hand, low retention rates suggest issues with gameplay, onboarding, or overall user experience.

3. Churn rate: Churn rate is when players stop engaging with the game over a specific period, indicating the number of players who churn or disengage. A high churn rate suggests issues with player retention and overall game satisfaction, while a low churn rate indicates strong player loyalty and satisfaction.

4. Session count: Session count tracks the number of gaming sessions initiated by players within a defined timeframe, providing insights into player engagement frequency and habits. Higher session counts indicate strong player engagement and interest in the game, while lower session counts may signal waning interest or competing priorities. Understanding session count trends can help developers identify opportunities to enhance return visits, introduce new content, or enhance social features to keep players returning for more.

5. Player count: Player count refers to the total number of players engaging with the game within a specified timeframe, providing insights into overall player interest and adoption.

6. New users: New users represent players who have recently installed or started playing the game for the first time within a defined timeframe. Monitoring new user acquisition rates gives you insights into the game’s appeal to new audiences, the effectiveness of your marketing efforts, and overall user acquisition trends.

7. Conversion rate: Conversion rate measures the percentage of players who take a desired action, such as making in-game purchases, subscribing to a premium service, or completing a specific level or task. A high conversion rate indicates effective monetization strategies and a players’ willingness to engage in these revenue-generating activities.

8. ARPU (Average Revenue Per User): ARPU calculates the average revenue generated per user, providing insights into the overall monetization effectiveness of a game. By dividing total revenue by the total number of active users within a specified period, you can calculate the average value of each player and identify opportunities to increase revenue through targeted monetization efforts.

10. LTV (Lifetime Value): LTV estimates the total revenue generated by a player over their entire engagement with the game, including initial and recurring purchases. By forecasting the long-term value of players, you can prioritize user acquisition efforts, optimize retention strategies, and calculate the return on investment for marketing campaigns and user acquisition initiatives.

11. IAP revenue (In-App Purchase Revenue): IAP revenue represents the total revenue generated from in-game purchases, including consumable items, virtual currency, expansion packs, and other digital goods.

12. ILRD (IAP Lifetime Revenue per Download): ILRD measures the lifetime revenue generated per download from in-app purchases, indicating the revenue potential of each acquired user. These metrics allows you to understand the average revenue generated per user acquisition and evaluate the effectiveness of monetization strategies in converting downloads into revenue.



Metrics:


2. Retention: Retention tracks the number of players who continue to engage with the game over time, typically measured at various intervals such as day 1, day 7, and day 30. High retention rates mean players find your game compelling and will likely return. On the other hand, low retention rates suggest issues with gameplay, onboarding, or overall user experience.

3. Churn rate: Churn rate is when players stop engaging with the game over a specific period, indicating the number of players who churn or disengage. A high churn rate suggests issues with player retention and overall game satisfaction, while a low churn rate indicates strong player loyalty and satisfaction.

6. New users: New users represent players who have recently installed or started playing the game for the first time within a defined timeframe. Monitoring new user acquisition rates gives you insights into the game’s appeal to new audiences, the effectiveness of your marketing efforts, and overall user acquisition trends.

10. LTV (Lifetime Value): LTV estimates the total revenue generated by a player over their entire engagement with the game, including initial and recurring purchases. By forecasting the long-term value of players, you can prioritize user acquisition efforts, optimize retention strategies, and calculate the return on investment for marketing campaigns and user acquisition initiatives.

11. IAP revenue (In-App Purchase Revenue): IAP revenue represents the total revenue generated from in-game purchases, including consumable items, virtual currency, expansion packs, and other digital goods.

12. ILRD (IAP Lifetime Revenue per Download): ILRD measures the lifetime revenue generated per download from in-app purchases, indicating the revenue potential of each acquired user. These metrics allows you to understand the average revenue generated per user acquisition and evaluate the effectiveness of monetization strategies in converting downloads into revenue.


1. active users: The number of unique users active (at least once) in the experiment window and pre experiment window.
2. first-month activation rate: The proportion of new registrants from the last 28 days who have been active at least once.
3. month-to-month retention rate: The proportion of active users in the last 28 days among those who were active in the preceding 28-day period (i.e., between 55 and 28 days ago).
4. month-to-month reactivation rate: The proportion of active users in the last 28 days among those who were not active in the preceding 28-day period.




In [None]:
after_assignment_conversion = (
    conversion_data[
        assignment_activity["age_convert"] >= assignment_activity["age_in_app"]
    ]
    .groupby("abtest_group")["age_convert"]
    .count()
    .reset_index()
)
after_assignment_conversion["total"] = after_assignment_conversion["age_convert"].sum()
after_assignment_conversion["ratio"] = (
    after_assignment_conversion["age_convert"] / after_assignment_conversion["total"]
)
after_assignment_conversion

In [None]:
before_assignment_conversion = (
    conversion_data[
        assignment_activity["age_convert"] < assignment_activity["age_in_app"]
    ]
    .groupby("abtest_group")["age_convert"]
    .count()
    .reset_index()
)
before_assignment_conversion["total"] = before_assignment_conversion[
    "age_convert"
].sum()
before_assignment_conversion["ratio"] = (
    before_assignment_conversion["age_convert"] / before_assignment_conversion["total"]
)
before_assignment_conversion

In [None]:
historical_activity