# Prior values creator

### Imports

In [1]:
from datetime import datetime
from ml_lib.feature_store import configure_offline_feature_store
from ml_lib.feature_store.offline.client import FeatureStoreOfflineClient
import numpy as np


### Function definitions

In [2]:
def compute_IG_params_from_mean_and_variance(mean = 2, variance = 1):
    a = 2+mean**2/variance
    b = (a-1)*mean
    return a,b

In [9]:
def get_prior_information(personalized, winsorized, project_id, n_days_spend, datapoint_type, company_id, num_of_past_dates_to_use, start_date_of_test):
    if personalized:
        personalized_num: int = 0
    else:
        personalized_num = 9

    if winsorized:
        spend_column: str = "wins_spend"
    else:
        spend_column = "spend"

    configure_offline_feature_store(workgroup="development", catalog_name="production")

    if project_id in ["spongebob_x7d9q", "terragenesis_m89uz"]:
        spending_line = f", SUM({spend_column}) as total_spend"
    else:
        spending_line = f", COALESCE(SUM(CASE WHEN fl_personalized_offer_spend <> {personalized_num} THEN {spend_column} END), 0) total_spend"

    spend_offset = str(n_days_spend - 1)

    if project_id == "idle_mafia_ecbqb":
        table_name = 'user_level_performance_after_1_6_2022'
    else: 
        table_name = 'user_level_performance'



    if datapoint_type == "one_datapoint_per_user_per_meta_date":
        print('one_datapoint_per_user_per_meta_date')
        data_query = f"""
                WITH
                    base_table AS (
                        SELECT user_id
                            , meta_date
                            , first_login                                                                          
                            {spending_line}
                        FROM analytics__{company_id}__{project_id}.{table_name}
                        WHERE meta_date  BETWEEN  DATE {start_date_of_test} - INTERVAL '{str(num_of_past_dates_to_use)}' DAY AND  DATE {start_date_of_test} - INTERVAL '1' DAY
                        GROUP BY user_id
                            , meta_date
                            , first_login
                        )

                SELECT COUNT(*)                                              AS totals
                , SUM(CASE WHEN total_spend > 0 THEN 1 ELSE 0 END) AS positives
                , SUM(total_spend) AS sum_values
                , SUM(LN(CASE WHEN total_spend > 0 THEN total_spend END)) AS sum_logs
                , AVG(CASE WHEN total_spend > 0 THEN 1 ELSE 0 END) AS conv_rate
                , AVG(LN(CASE WHEN total_spend > 0 THEN total_spend END)) AS mean_of_logs_spend
                , VARIANCE(LN(CASE WHEN total_spend > 0 THEN total_spend END)) AS var_of_logs_spend
                FROM base_table;"""
    else:
        data_query = f"""
                WITH daily_user_spend AS (
                    SELECT user_id
                        , meta_date
                        , first_login
                        {spending_line}
                    FROM analytics__{company_id}__{project_id}.{table_name}
                    WHERE first_login BETWEEN DATE {start_date_of_test} - INTERVAL '{str(num_of_past_dates_to_use)}' DAY - INTERVAL '{spend_offset}' DAY AND  DATE {start_date_of_test} - INTERVAL '1' DAY - INTERVAL '{spend_offset}' DAY
                    GROUP BY user_id
                        , meta_date
                        , first_login)

                , daily_user_spend_only_first_n_days AS (
                    SELECT *
                    FROM daily_user_spend
                    WHERE meta_date >= first_login
                    AND meta_date <= first_login + INTERVAL '{spend_offset}' DAY
                    )

                , base_table AS (
                    SELECT user_id, first_login as meta_date, SUM(total_spend) AS total_spend
                    FROM daily_user_spend_only_first_n_days
                    GROUP BY user_id, first_login
                    )
                
                
                SELECT COUNT(*)                                        AS totals
                , SUM(CASE WHEN total_spend > 0 THEN 1 ELSE 0 END) AS positives
                , SUM(total_spend) AS sum_values
                , AVG(CASE WHEN total_spend > 0 THEN 1 ELSE 0 END) AS conv_rate
                , SUM(LN(CASE WHEN total_spend > 0 THEN total_spend END)) AS sum_logs
                , AVG(LN(CASE WHEN total_spend > 0 THEN total_spend END)) AS mean_of_logs_spend
                , VARIANCE(LN(CASE WHEN total_spend > 0 THEN total_spend END)) AS var_of_logs_spend
                FROM base_table;"""
        
    data_df = FeatureStoreOfflineClient.run_athena_query_pandas(data_query)

    IG_params = compute_IG_params_from_mean_and_variance(mean = data_df['var_of_logs_spend'][0], variance = 1)

    print("Company id:", company_id)
    print("Project id:", project_id)
    print("Datapoint type:", datapoint_type)
    print("N days spend:", n_days_spend)
    print("Winsorized:", winsorized)
    print("Personalized:", personalized)
    print("Start of the test:", start_date_of_test)
    print("a_prior_beta:", 1)
    print("b_prior_beta:", 1)
    print("a_prior_ig:", IG_params[0])
    print("b_prior_ig:", IG_params[1])
    print("m_prior:", data_df['mean_of_logs_spend'].values[0])
    print("w_prior:", 10)

### Compute priors

In [None]:
# ["century_games_ncmgu", "idle_mafia_ecbqb"]
# ["tilting_point_mjs4k", "terragenesis_m89uz"]
# ["tinysoft_a9kwp", "heroes_magic_war_h2sln"]
# ["phoenix_games_cd8wx", "knighthood_v2_ghbch"]
# ["webelinx_ewo7l", "roomsexits_xk87l"]

In [11]:
company_id = "phoenix_games_cd8wx"
project_id = "knighthood_v2_ghbch"
personalized = True
winsorized = True

datapoint_type = "one_datapoint_per_user_per_meta_date"
n_days_spend = 7

start_date_of_test = datetime(2023,7,11)
start_date_of_test = start_date_of_test.strftime("'%Y-%m-%d'")
num_of_past_dates_to_use = 90

In [12]:
get_prior_information(personalized, winsorized, project_id, n_days_spend, datapoint_type, company_id, num_of_past_dates_to_use, start_date_of_test)

one_datapoint_per_user_per_meta_date


DummyStatsClient._send unknown.athena_query_runs_total:1|c


Company id: phoenix_games_cd8wx
Project id: knighthood_v2_ghbch
Datapoint type: one_datapoint_per_user_per_meta_date
N days spend: 7
Winsorized: True
Personalized: True
Start of the test: '2023-07-11'
a_prior_beta: 1
b_prior_beta: 1
a_prior_ig: 2.2379636893086023
b_prior_ig: 0.6038975300166833
m_prior: 1.5901426468880684
w_prior: 10


DummyStatsClient._send unknown.athena_query_queue_ms:647.000000|ms
DummyStatsClient._send unknown.athena_query_execution_ms:2696.000000|ms
DummyStatsClient._send unknown.athena_query_scanned_bytes:1839833|c
