In [14]:
import yt.wrapper as yt

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import mannwhitneyu, ttest_ind
import os

from tqdm.auto import tqdm


%matplotlib inline

In [15]:
yt.config["proxy"]["url"] = 'hahn.yt.yandex.net'
client = yt.YtClient(proxy='hahn', token=os.environ.get('YT_TOKEN'))

In [29]:
def calc_metric_avg(data, part_coef=0.8, test_name="test", portion=None, buckets=100, alpha=0.0, norm_column="probability"):
    
    flag = (data["test_name"] == test_name) & (data["stupid_cnt"] == 0) 
    
    if portion is not None:
        flag = flag & (data["portion"] == portion)
    
    nes_part = data[flag]
    
#     frequency_norm = nes_part[norm_column].values / nes_part[norm_column].values.sum()
    
#     norm = 1 * (1 - alpha) + alpha * frequency_norm * len(nes_part)
    
    raw_resulrs = (nes_part["part_cnt"] * part_coef + nes_part["good_cnt"]) / (nes_part["part_cnt"] + nes_part["good_cnt"] + nes_part["bad_cnt"])
        
    bucket = nes_part["uuid"].apply(lambda x: hash(x) % buckets)
    
    pd_results = pd.DataFrame({
        "metric": raw_resulrs.values,
        "bucket": bucket.values
    })
    
    final_results = pd_results.groupby(by=["bucket"]).mean()["metric"].values
    
    return final_results

def calc_metric_energy(data, part_coef=0.8, test_name="test", portion=None, buckets=100, alpha=0.0, norm_column="probability"):
    
    flag = (data["test_name"] == test_name) & (data["stupid_cnt"] == 0) 
    
    if portion is not None:
        flag = flag & (data["portion"] == portion)
    
    nes_part = data[flag]
    
#     frequency_norm = nes_part[norm_column].values / nes_part[norm_column].values.sum()
    
#     norm = 1 * (1 - alpha) + alpha * frequency_norm * len(nes_part)
    
    raw_resulrs = (nes_part["part_cnt"] * part_coef + nes_part["good_cnt"])
    
    bucket = nes_part["uuid"].apply(lambda x: hash(x) % buckets)
    
    pd_results = pd.DataFrame({
        "metric": raw_resulrs.values,
        "bucket": bucket.values
    })
    
    final_results = pd_results.groupby(by=["bucket"]).mean()["metric"].values
    
    return final_results

def calc_metric_raw_avg(data, part_coef=0.8, test_name="test", portion=None, buckets=100, alpha=0.0, norm_column="probability"):
    
    flag = (data["test_name"] == test_name)
    
    if portion is not None:
        flag = flag & (data["portion"] == portion)
    
    nes_part = data[flag]
    
    bucket = nes_part["uuid"].apply(lambda x: hash(x) % buckets).values
    
    final_buckets = np.concatenate(
        (
            np.repeat(bucket, nes_part["part_cnt"].values), 
            np.repeat(bucket, nes_part["good_cnt"].values), 
            np.repeat(bucket, nes_part["bad_cnt"].values)
        )
    )
    
    raw_resulrs = np.concatenate(
        (
            np.ones(nes_part["part_cnt"].values.sum()) * part_coef, 
            np.ones(nes_part["good_cnt"].values.sum()), 
            np.zeros(nes_part["bad_cnt"].values.sum())
        )
    )
    
    
    pd_results = pd.DataFrame({
        "metric": raw_resulrs,
        "bucket": final_buckets
    })
    
    final_results = pd_results.groupby(by=["bucket"]).mean()["metric"].values
    
    return final_results



def process_exp(table_name, 
                portion=None, 
                calc_metric=calc_metric_avg, 
                alpha=0.0, 
                plot=True, 
                norm_column="frequency", 
                beta=1000.0,
                test_name="test",
                cntrl_name="cntrl",
                probability_threshold=0.
            ):
    read_result = pd.DataFrame(client.read_table(table_name, raw=False))
    read_result["is_part"] = 1 * (read_result["result"] == "part")
    read_result["is_stupid"] = 1 * (read_result["result"] == "stupid")
    read_result["is_good"] = 1 * (read_result["result"] == "good")
    read_result["is_bad"] = 1 * (read_result["result"] == "bad")
    
    if probability_threshold > 0:
        read_result = read_result[read_result["probability"] >= probability_threshold].reset_index(drop=True)
    
    
    parsed_data = read_result.groupby(by=["session_id_old"]).agg(
                                                    part_cnt = pd.NamedAgg(column = 'is_part', aggfunc = 'sum'),
                                                    stupid_cnt = pd.NamedAgg(column = 'is_stupid', aggfunc = 'sum'),
                                                    good_cnt = pd.NamedAgg(column = 'is_good', aggfunc = 'sum'),
                                                    bad_cnt = pd.NamedAgg(column = 'is_bad', aggfunc = 'sum'),
                                                    test_name = pd.NamedAgg(column = 'test_name', aggfunc = 'last'),
                                                    uuid = pd.NamedAgg(column = 'uuid', aggfunc = 'last'),
                                                    portion = pd.NamedAgg(column = 'portion', aggfunc = 'last'),
                                                    frequency = pd.NamedAgg(column = 'frequency', aggfunc = 'mean'),
                                                    session_len = pd.NamedAgg(column = 'session_len', aggfunc = 'last'),
                                                    ).reset_index()
#     frequency = parsed_data.groupby(by=["test_name", "session_len", "portion"]).agg(
#         frequency = pd.NamedAgg(column = 'frequency', aggfunc = 'last'),
#     ).reset_index()

#     frequency_norm = frequency.groupby(by=["test_name", "portion"])["frequency"].sum().reset_index()

#     frequency = frequency.merge(frequency_norm, on=["test_name", "portion"], suffixes=("", "_norm"))
#     frequency["probability"] = frequency["frequency"]/frequency["frequency_norm"]
#     frequency.drop(columns=["frequency", "frequency_norm"], inplace=True)

#     parsed_data = parsed_data.merge(frequency, on=["test_name", "session_len", "portion"])

#     sessions_length = parsed_data.groupby(by=["test_name", "portion"])["session_len"].sum().reset_index()
#     parsed_data = parsed_data.merge(sessions_length, on=["test_name", "portion"],suffixes=("", "_norm"))
#     parsed_data["normalized_len"] = parsed_data["session_len"]/parsed_data["session_len_norm"]
    
#     prob = np.random.uniform(size=len(parsed_data))
    
#     parsed_data = parsed_data[prob < beta * parsed_data["probability"]].reset_index(drop=True)
    
    
    
    test_raw = calc_metric(parsed_data, part_coef=0.5, test_name=test_name, portion=portion, alpha=alpha, norm_column=norm_column)
    ctrl_raw = calc_metric(parsed_data, part_coef=0.5, test_name=cntrl_name, portion=portion, alpha=alpha, norm_column=norm_column)
    
    
    pvalue=mannwhitneyu(ctrl_raw, test_raw).pvalue
    effect = (test_raw.mean() - ctrl_raw.mean())/ctrl_raw.mean()
    message = f'pvalue={pvalue:.3f}, effect={effect:.3f}, ctrl metric = {ctrl_raw.mean():.3f}, test_metric = {test_raw.mean():.3f}, cnt = {len(ctrl_raw) + len(test_raw)}'
    #, pvalue, effect, ctrl_metric.mean(), test_metric.mean()
    if plot:
        print(message)
        print()
    
    res_dict={"p_value": pvalue, "cntr_value": ctrl_raw.mean(), "test_value": test_raw.mean()}
    
    coefs = np.linspace(0, 1.0, 50)

    
    if plot:
        p_values = []

        for coef in coefs:
            test_raw = calc_metric(parsed_data, part_coef=coef, test_name=test_name, portion=portion)
            ctrl_raw = calc_metric(parsed_data, part_coef=coef, test_name=cntrl_name, portion=portion)
            pvalue=mannwhitneyu(ctrl_raw, test_raw).pvalue
            p_values.append(pvalue)

        plt.plot(coefs, p_values)
        plt.show()
    
    return res_dict


def make_report_table_for_exp(table, test_name="test",
                cntrl_name="ctrl", probability_threshold=0.0):
    
    res_metric_avg = process_exp(table,
                                portion=None, 
                                plot=False,
                                calc_metric=calc_metric_avg,
                                test_name=test_name,
                                cntrl_name=cntrl_name,
                                probability_threshold=probability_threshold
                                 )
    
    
    res_metric_raw_avg = process_exp(table,
                                      portion=None, 
                                      plot=False,
                                      calc_metric=calc_metric_raw_avg,
                                      test_name=test_name,
                                      cntrl_name=cntrl_name,
                                      probability_threshold=probability_threshold
                                     )
    
    
    
    res_metric_energy = process_exp(table,
                                 portion=None, 
                                 plot=False,
                                 calc_metric=calc_metric_energy,
                                 test_name=test_name,
                                 cntrl_name=cntrl_name,
                                 probability_threshold=probability_threshold
                                                                )
    
    results = pd.DataFrame([
        {
            "metric": "Усреднеенная метрика по сессиям",
            "cntrl_value": res_metric_avg["cntr_value"],
            "test_value": res_metric_avg["test_value"],
            "diff": res_metric_avg["test_value"] - res_metric_avg["cntr_value"],
            "p_value": res_metric_avg["p_value"],
        
        },
        
        
        {
            "metric": "Усреднеенная метрика по микро сессиям",
            "cntrl_value": res_metric_raw_avg["cntr_value"],
            "test_value": res_metric_raw_avg["test_value"],
            "diff": res_metric_raw_avg["test_value"] - res_metric_raw_avg["cntr_value"],
            "p_value": res_metric_raw_avg["p_value"],
        
        },
        
        {
            "metric": "Средняя энергия сессий",
            "cntrl_value": res_metric_energy["cntr_value"],
            "test_value": res_metric_energy["test_value"],
            "diff": res_metric_energy["test_value"] - res_metric_energy["cntr_value"],
            "p_value": res_metric_energy["p_value"],
        
        },
        
    ])
    
    return results

# Равномерное распределение длинн сессий

In [30]:
make_report_table_for_exp("//home/voice/ilnur/tasks/EXPERIMENTS-73207/extract_result_2_portions")

Unnamed: 0,metric,cntrl_value,test_value,diff,p_value
0,Усреднеенная метрика по сессиям,0.510772,0.523795,0.013023,0.152189
1,Усреднеенная метрика по микро сессиям,0.502144,0.515998,0.013854,0.088497
2,Средняя энергия сессий,0.726501,0.746549,0.020048,0.157849


# Продовое распределение длинн сессий, сценарий болталки

In [27]:
make_report_table_for_exp("//home/voice/eliseevmax/tasks/EXPERIMENTS-73207/results_with_random_len_gc", test_name="test_378112", cntrl_name="cntrl_378111")

Unnamed: 0,metric,cntrl_value,test_value,diff,p_value
0,Усреднеенная метрика по сессиям,0.51219,0.537315,0.025125,0.101829
1,Усреднеенная метрика по микро сессиям,0.519698,0.52815,0.008452,0.858165
2,Средняя энергия сессий,1.095549,1.075025,-0.020524,0.663499


# Продовое распределение длинн сессий

In [28]:
make_report_table_for_exp("//home/voice/eliseevmax/tasks/EXPERIMENTS-73207/results_with_random_len", test_name="test_378112", cntrl_name="cntrl_378111")

Unnamed: 0,metric,cntrl_value,test_value,diff,p_value
0,Усреднеенная метрика по сессиям,0.570513,0.600962,0.030449,0.874046
1,Усреднеенная метрика по микро сессиям,0.537037,0.598077,0.06104,0.605791
2,Средняя энергия сессий,1.153846,1.269231,0.115385,0.568732


# Фильтрация ответов по согласованности

In [31]:
make_report_table_for_exp("//home/voice/eliseevmax/tasks/EXPERIMENTS-73207/results_with_random_len", 
                          test_name="test_378112", cntrl_name="cntrl_378111",
                         probability_threshold=0.5)

Unnamed: 0,metric,cntrl_value,test_value,diff,p_value
0,Усреднеенная метрика по сессиям,0.622222,0.625,0.002778,0.943111
1,Усреднеенная метрика по микро сессиям,0.637255,0.625,-0.012255,0.862277
2,Средняя энергия сессий,1.1,1.269231,0.169231,0.393472


In [32]:
make_report_table_for_exp("//home/voice/eliseevmax/tasks/EXPERIMENTS-73207/results_with_random_len", 
                          test_name="test_378112", cntrl_name="cntrl_378111",
                         probability_threshold=0.7)

Unnamed: 0,metric,cntrl_value,test_value,diff,p_value
0,Усреднеенная метрика по сессиям,0.75,0.428571,-0.321429,0.070394
1,Усреднеенная метрика по микро сессиям,0.75,0.428571,-0.321429,0.070394
2,Средняя энергия сессий,0.75,0.428571,-0.321429,0.070394


In [33]:
make_report_table_for_exp("//home/voice/eliseevmax/tasks/EXPERIMENTS-73207/results_with_random_len_gc", 
                          test_name="test_378112", cntrl_name="cntrl_378111",
                         probability_threshold=0.5)

Unnamed: 0,metric,cntrl_value,test_value,diff,p_value
0,Усреднеенная метрика по сессиям,0.575938,0.58868,0.012742,0.315829
1,Усреднеенная метрика по микро сессиям,0.585314,0.584349,-0.000965,0.514839
2,Средняя энергия сессий,1.185563,1.127015,-0.058548,0.220682


In [34]:
make_report_table_for_exp("//home/voice/eliseevmax/tasks/EXPERIMENTS-73207/results_with_random_len_gc", 
                          test_name="test_378112", cntrl_name="cntrl_378111",
                         probability_threshold=0.7)

Unnamed: 0,metric,cntrl_value,test_value,diff,p_value
0,Усреднеенная метрика по сессиям,0.617099,0.628315,0.011216,0.814422
1,Усреднеенная метрика по микро сессиям,0.620005,0.617743,-0.002262,0.777543
2,Средняя энергия сессий,0.763327,0.735658,-0.027669,0.504484


# Аггрегация результатов бе MV

In [38]:
GOOD_COEF = 1
PART_COEF = 0.5
BAD_COEF = 0
STUPID_COEF = -1

In [43]:
def calc_metrica_by_session(data, test_name, buckets=100, filter_stupid=True):
    flag = data["test_name"] == test_name
    if filter_stupid:
        flag = flag & (data["stupid_cnt"] == 0)
    nes_data = data[flag]
    bucket = nes_data["uuid"].apply(lambda x: hash(x) % buckets).values
    
    pd_results = pd.DataFrame({
        "metric": nes_data["metric"].values,
        "bucket": bucket
    })
    
    final_results = pd_results.groupby(by=["bucket"]).mean()["metric"].values
    
    return final_results

In [51]:
table_name = "//home/voice/eliseevmax/tasks/EXPERIMENTS-73207/pure_results_with_random_len_prepared"

read_result = pd.DataFrame(client.read_table(table_name, raw=False))
read_result["is_part"] = 1 * (read_result["result"] == "part")
read_result["is_stupid"] = 1 * (read_result["result"] == "stupid")
read_result["is_good"] = 1 * (read_result["result"] == "good")
read_result["is_bad"] = 1 * (read_result["result"] == "bad")


parsed_data = read_result.groupby(by=["session_id"]).agg(
                                                part_cnt = pd.NamedAgg(column = 'is_part', aggfunc = 'sum'),
                                                stupid_cnt = pd.NamedAgg(column = 'is_stupid', aggfunc = 'sum'),
                                                good_cnt = pd.NamedAgg(column = 'is_good', aggfunc = 'sum'),
                                                bad_cnt = pd.NamedAgg(column = 'is_bad', aggfunc = 'sum'),
                                                test_name = pd.NamedAgg(column = 'test_name', aggfunc = 'last'),
                                                uuid = pd.NamedAgg(column = 'uuid', aggfunc = 'last'),
                                                portion = pd.NamedAgg(column = 'portion', aggfunc = 'last'),
                                                frequency = pd.NamedAgg(column = 'frequency', aggfunc = 'mean'),
                                                session_len = pd.NamedAgg(column = 'session_len', aggfunc = 'last'),
                                                session_id_old = pd.NamedAgg(column = 'session_id_old', aggfunc = 'last'),
                                                ).reset_index()
frequency = parsed_data.groupby(by=["test_name", "session_len", "portion"]).agg(
    frequency = pd.NamedAgg(column = 'frequency', aggfunc = 'last'),
).reset_index()



In [47]:
parsed_data["metric"] = (
    GOOD_COEF * parsed_data["good_cnt"] + PART_COEF * parsed_data["part_cnt"] + BAD_COEF * parsed_data["bad_cnt"] + STUPID_COEF * parsed_data["stupid_cnt"]
)/ (
    parsed_data["good_cnt"] + parsed_data["part_cnt"] + parsed_data["bad_cnt"] + parsed_data["stupid_cnt"]
)

In [48]:
test_raw = calc_metrica_by_session(parsed_data, test_name="test_378112")

ctrl_raw = calc_metrica_by_session(parsed_data, test_name="cntrl_378111")

pvalue=mannwhitneyu(ctrl_raw, test_raw).pvalue
effect = (test_raw.mean() - ctrl_raw.mean())/ctrl_raw.mean()
message = f'pvalue={pvalue:.3f}, effect={effect:.3f}, ctrl metric = {ctrl_raw.mean():.3f}, test_metric = {test_raw.mean():.3f}, cnt = {len(ctrl_raw) + len(test_raw)}'
#, pvalue, effect, ctrl_metric.mean(), test_metric.mean()
print(message)

pvalue=0.933, effect=-0.004, ctrl metric = 0.582, test_metric = 0.580, cnt = 200


In [50]:
test_raw = calc_metrica_by_session(parsed_data, test_name="test_378112", filter_stupid=False)

ctrl_raw = calc_metrica_by_session(parsed_data, test_name="cntrl_378111", filter_stupid=False)

pvalue=mannwhitneyu(ctrl_raw, test_raw).pvalue
effect = (test_raw.mean() - ctrl_raw.mean())/ctrl_raw.mean()
message = f'pvalue={pvalue:.3f}, effect={effect:.3f}, ctrl metric = {ctrl_raw.mean():.3f}, test_metric = {test_raw.mean():.3f}, cnt = {len(ctrl_raw) + len(test_raw)}'
#, pvalue, effect, ctrl_metric.mean(), test_metric.mean()
print(message)

pvalue=0.252, effect=0.030, ctrl metric = 0.465, test_metric = 0.479, cnt = 200
