# Импорты

In [28]:
import pandas as pd 
pd.set_option('display.max_columns', None)
import numpy as np
import datetime as dt
from matplotlib import pyplot as plt 
plt.style.use('ggplot')
import seaborn as sns 
sns.set(rc={'figure.figsize':(15, 5)})
import sys
import os
sys.path.append('/home/jovyan') 
from connectors import read_sql_query
from tqdm import tqdm

In [29]:
!pip install tabulate

Looking in indexes: https://nexus.sbmt.io/repository/python/simple


In [30]:
import tabulate

In [31]:
from typing import Tuple 

# Функции

In [32]:
def deltamethod(x: np.array, y: np.array,
                independent: bool = False, bc: bool = False) -> Tuple[float, float]:
    n = len(x)
    mux = np.mean(x)
    muy = np.mean(y)
    
    v11 = np.var(y,ddof=1)
    v22 = np.var(x,ddof=1)
    
    if independent == True:
        v12 = 0
    else: 
        v12 = np.cov(x,y)[0][1]
    
    est = muy / mux
    
    if bc == True:
        est = est + muy/mux**3*v22/n - 1/mux**2*v12/n
    sd = (v11 / mux**2) - (2 * muy / mux**3 * v12) + (muy**2 / mux**4 * v22)
    sd = np.sqrt(sd)
    return est, sd

In [33]:
def drop_outliers(df: pd.DataFrame) -> pd.DataFrame:

    outliers_df = df.groupby("anonymous_id", as_index=False).agg(
        gmv_per_user_tmp=("gmv_per_user", "sum"),
        # gmv_per_user_planned_tmp=("gmv_per_user_planned", "sum"),
        # gmv_per_user_express_tmp=("gmv_per_user_express", "sum"),
        orders_tmp=("orders", "sum"),
        # orders_planned_tmp=("planned_orders", "sum"),
        # orders_express_tmp=("express_orders", "sum")
    )

    outliers_df["avg_check"] = np.where(
        outliers_df["orders_tmp"] != 0, outliers_df["gmv_per_user_tmp"] / outliers_df["orders_tmp"], 
        0
    )
    
    # outliers_df["avg_check_planned"] = np.where(
    #     outliers_df["orders_planned_tmp"] != 0,
    #     outliers_df["gmv_per_user_planned_tmp"] / outliers_df["orders_planned_tmp"],
    #     0,
    # )

    # outliers_df["avg_check_express"] = np.where(
    #     outliers_df["orders_express_tmp"] != 0,
    #     outliers_df["gmv_per_user_express_tmp"] / outliers_df["orders_express_tmp"],
    #     0,
    # )

    gmv = outliers_df[outliers_df["gmv_per_user_tmp"] > 0]
    gmv_mean = gmv["gmv_per_user_tmp"].mean()
    gmv_std = gmv["gmv_per_user_tmp"].std()
    
    # gmv_planned = outliers_df[outliers_df["gmv_per_user_planned_tmp"] > 0]
    # gmv_planned_mean = gmv_planned["gmv_per_user_planned_tmp"].mean()
    # gmv_planned_std = gmv_planned["gmv_per_user_planned_tmp"].std()

    # gmv_express = outliers_df[outliers_df["gmv_per_user_express_tmp"] > 0]
    # gmv_express_mean = gmv_express["gmv_per_user_express_tmp"].mean()
    # gmv_express_std = gmv_express["gmv_per_user_express_tmp"].std()

    orders = outliers_df[outliers_df["orders_tmp"] > 0]
    orders_mean = orders["orders_tmp"].mean()
    orders_std = orders["orders_tmp"].std()
    
    # orders_planned = outliers_df[outliers_df["orders_planned_tmp"] > 0]
    # orders_planned_mean = orders_planned["orders_planned_tmp"].mean()
    # orders_planned_std = orders_planned["orders_planned_tmp"].std()
    
    # orders_express = outliers_df[outliers_df["orders_express_tmp"] > 0]
    # orders_express_mean = orders_express["orders_express_tmp"].mean()
    # orders_express_std = orders_express["orders_express_tmp"].std()

    avg_check = outliers_df[outliers_df["avg_check"] > 0]
    avg_check_mean = avg_check["avg_check"].mean()
    avg_check_std = avg_check["avg_check"].std()

    # avg_check_planned = outliers_df[outliers_df["avg_check_planned"] > 0]
    # avg_check_planned_mean = avg_check_planned["avg_check_planned"].mean()
    # avg_check_planned_std = avg_check_planned["avg_check_planned"].std()

    # avg_check_express = outliers_df[outliers_df["avg_check_express"] > 0]
    # avg_check_express_mean = avg_check_express["avg_check_express"].mean()
    # avg_check_express_std = avg_check_express["avg_check_express"].std()

    try:
        outliers_df = outliers_df[
            (outliers_df["gmv_per_user_tmp"] >= (gmv_mean + (gmv_std * 9))) |
            # (outliers_df["gmv_per_user_planned_tmp"] >= (gmv_planned_mean + (gmv_planned_std * 9))) |
            # (outliers_df["gmv_per_user_express_tmp"] >= (gmv_express_mean + (gmv_express_std * 9))) |
            (outliers_df["orders_tmp"] >= (orders_mean + (orders_std * 9))) |
            # (outliers_df["orders_planned_tmp"] >= (orders_planned_mean + (orders_planned_std * 9))) |
            # (outliers_df["orders_express_tmp"] >= (orders_express_mean + (orders_express_std * 9))) |
            (outliers_df["avg_check"] >= (avg_check_mean + (avg_check_std * 9)))
            # (outliers_df["avg_check_planned"] >= (avg_check_planned_mean + (avg_check_planned_std * 9))) |
            # (outliers_df["avg_check_express"] >= (avg_check_express_mean + (avg_check_express_std * 9)))
        ]
    except:
        outliers_df = outliers_df[outliers_df["anonymous_id"].isnull()]

    df = df[~df["anonymous_id"].isin(outliers_df["anonymous_id"])].reset_index(drop=True)
    return df

In [34]:
import pickle

def dump_data(data_name, file_path):
    """
    Дампинг данных 
    
    :param data_name: файл 
    :param file_path: str, путь до файла 
    """
    with open(file_path, "wb") as f:
        pickle.dump(data_name, f)

def load_data(data_path):
    """
    Загрузка файла 
    
    :param data_path: str, путь до файла 
    """
    with open(data_path, 'rb') as f:
        data_name = pickle.load(f)
    return data_name

# Константы

In [35]:
end_date = dt.date.today()-dt.timedelta(days=7)#лучше делать интервал чтоб долетели данные
platforms = ["ios","android","web"] 
lengths = [2,3,4] #weeks
traffic_proportions = [0.125, 0.25, 0.5, 0.75, 1]
thresholds = [0.05, 0.01]

In [36]:
received_at_window = 3#3 дня долета событийки

In [37]:
dump_files_path = '/home/jovyan/AB-tests/Изменение квантов весовых товаров/'

# Фильтрующее событие

In [38]:
read_sql_query("""
SELECT event,
       uniqExact(anonymous_id) AS n_anons
FROM event.new_app 
 WHERE dwh_dt >= yesterday()-3
AND toDate(ts) = yesterday()-3
and event in ('Product Rendered')
and params['product_sku'] global in (select product_sku from ods.product_hub__product where pack_type = 'Весовой')
group by 1
               """)

Unnamed: 0,event,n_anons
0,Product Rendered,414093


In [39]:
event_filter = f"""
'Product Rendered'
"""

ios_version_filter = f"""
(
  (
    platform = 'ios'
    and (app_version like '7%%' OR app_version like '8%%')
    and (
          (
            toUInt64OrZero(splitByChar('.', app_version)[1]) >= 7 
            and toUInt64OrZero(splitByChar('.', app_version)[2]) >= 16
          )
          or (toUInt64OrZero(app_version) >= 7160000)
          or toUInt64OrZero(splitByChar('.', app_version)[1]) >= 8
        )
  ) 
  OR platform != 'ios'
)
"""

sources_new_app = (
    "Order Completed",
    "Product Added",
    "Login",
    "Shop Selected",
    "Address Change Initiated",
    "Product Viewed",
    "Search Results Viewed",
    "Category Viewed",
    "Shop Selection Started",
    "Pickup Map Opened",
    "Retailer Selection Started",
    "Checkout Sber Spasibo Form Opened",
    "Map Pickup Shop Selected",
    "Checkout Button Clicked",
    "Checkout Loaded",
    "Main Page Viewed",
    "Checkout Delivery Slot Selection Started",
    "Checkout Delivery Slot Selected",
    "Cart Viewed",
    "Search Started",
    "Banner Viewed",
    "Recommendation Block View",
    "Product Category Viewed",
    "Catalogue Tab Clicked",
    "Onboarding Banner Viewed",
    "Order Info Viewed",
    "Product Sort Initiated",
    "Product Filter Initiated",
    "Product Sorted",
    "Address Selected",
    "Address Search Opened",
    "Address Suggest Clicked",
    "Redirected From SBOL",
    "Sberid Authorization Prompt Shown",
    "Map Pickup Button Clicked",
    "Age Confirmation Viewed",
    "Favorites Tab Clicked",
    "Address Change Clicked",
    "Retailer Selected"
)

sources_web = (
    "Order Completed",
    "Checkout Button Clicked",
    "Landing Viewed",
    "Product Added",
    "Main Page Viewed",
    "Suggester Viewed",
    "Main Page Viewed",
    "Shop Selected",
    "Product Viewed",
    "Recommendation Product View",
    "Search Results Viewed",
    "Category Viewed",
    "Shop Selection Started",
    "Pickup Map Opened",
    "Cart Viewed",
    "Order Merged",
    "Checkout Loaded",
    "Pre Replacements Product Selected",
    "Pre Replacements Product All Choosed",
    "Address Change Initiated",
    "Retailer Shipping Method Clicked",
    "Button Find Stores Clicked",
    "Favourites Tab Clicked",
    "Checkout Slot Selection Started",
    "Checkout Delivery Slot Selected",
    "Search Started",
    "Search Completed",
    "Search Suggest Clicked",
    "Suggester Viewed",
    "Banner Viewed",
    "Recommendation Block View",
    "Product Category Viewed",
    "Add To Cart Clicked",
    "Product Sort Initiated",
    "Product Sorted",
    "Cart Params Loaded",
    "Address Suggest Clicked",
    "Alcohol Pickup Button Clicked",
    "Only Pickup Popup Viewed",
    "Retailer Selected"
)

In [40]:
print(event_filter)


'Product Rendered'



# Метрики

## Базовые

In [19]:
def get_general_metrics_within_retlist_new(platforms, start_date, end_date, sources_web, sources_new_app):
    q = f"""
    with 
        toDate('{start_date}') as start_date,
        toDate('{end_date}') as end_date,
        
    filter_events as (
        select
            anonymous_id,
            toDate(min(ts)) as event_dt
        from event.new_app
        where 1=1
            and event = {event_filter}
            and dwh_dt between start_date and end_date
            and toDate(ts) between start_date and end_date
            and {ios_version_filter}
            and retailer_id GLOBAL in (select retailer_id from analytics.int_retailer_verticals_dict where vertical = 0)
        group by anonymous_id

        UNION ALL 

        select
            toString(anonymous_id) as anonymous_id,
            toDate(min(ts)) as event_dt
        from event.web
        where 1=1
            and event = {event_filter}
            and dwh_dt between start_date and end_date
            and toDate(ts) between start_date and end_date
            and retailer_id GLOBAL in (select retailer_id from analytics.int_retailer_verticals_dict where vertical = 0)
        group by anonymous_id
    ),

    events as (
        select 
            toString(anonymous_id) as anonymous_id,
            toDate(ts) as dt,
            'web' as platform,
            ifNull(NullIf(tenant, ''), 'sbermarket') as tenant
        from event.web
        where 1=1
            and toDate(ts) between start_date and end_date
            and dwh_dt between start_date and end_date
            and not_bot
            and event in {sources_web}
            and anonymous_id GLOBAL IN (SELECT anonymous_id FROM filter_events)
            and retailer_id GLOBAL in (select retailer_id from analytics.int_retailer_verticals_dict where vertical = 0)
        group by anonymous_id, dt, platform, tenant

        union all

        select 
            anonymous_id,
            toDate(ts) as dt,
            toString(platform) as platform,
            'sbermarket' as tenant
        from event.new_app
        where 1=1
            and toDate(ts) between start_date and end_date
            and dwh_dt between start_date and end_date
            and event in {sources_new_app}
            and {ios_version_filter}
            and anonymous_id GLOBAL IN (SELECT anonymous_id FROM filter_events)
            and retailer_id GLOBAL in (select retailer_id from analytics.int_retailer_verticals_dict where vertical = 0)
        group by anonymous_id, dt, platform, tenant
    ),

    

    client_id_x_order_id as (
        select distinct
            date_msk as date,
            uuid,
            anonymous_id,
            if(platform in ('desktop', 'mobile'), 'web', platform) as platform,
            tenant,
            order_id
        from 
            cdm.ab__client_id_x_order_id
        where true 
            and date_msk between start_date and end_date
            and anonymous_id IN (SELECT anonymous_id FROM filter_events)
    ),

    ab_groups_x_events as (
        select distinct
            events.anonymous_id as anonymous_id,
            events.platform as platform,
            events.tenant as tenant,
            client_id_x_order_id.order_id as order_id,
            events.dt as events_dt
        from 
            events

            inner join filter_events 
                on events.anonymous_id = filter_events.anonymous_id

            left join client_id_x_order_id
                on events.anonymous_id = client_id_x_order_id.anonymous_id
                and events.dt = client_id_x_order_id.date
                and events.platform = client_id_x_order_id.platform
                and events.tenant = client_id_x_order_id.tenant

        where true
          and filter_events.event_dt <= events_dt
    ),

    late_cancelled_orders as (
     select 
           stateful_id as order_id,
           minIf(toDate(created_at), next_state = 'complete') as completed_at,
           minIf(toDate(created_at), next_state = 'canceled') as canceled_at,
           if((canceled_at > completed_at + interval 2 day or canceled_at > end_date), 1, 0) as late_cancel_flg
      from (
      select stateful_type,
            created_at,
            next_state,
            stateful_id
        from analytics.int_spree_state_changes  
       where stateful_type = 'Spree::Order'
         and toDate(created_at) between start_date and end_date
         and next_state in ('complete', 'canceled')
        order by created_at) issc
     group by stateful_id
    having completed_at is not null
       and late_cancel_flg
    ),

    financial_measures as (
        select 
            uuid,
        order_number,
        toDate(completed_at) as completed_dt,
        max(shipped_at) as shipped_at,
        sumIf(1, type_store_delivery == 'planned') as planned_store_flg,
        sumIf(1, type_store_delivery == 'on_demand') as express_store_flg,
        sumIf(1, type_delivery=='asap') as express_flg,
        sumIf(1, type_delivery=='planned') as planned_flg,
        sumIf(1, type_delivery=='pickup') as pickup_flg,
        sumIf(gmv_service_fee_net_promo, type_store_delivery == 'on_demand') +
            sumIf(gmv_goods_net_promo, type_store_delivery == 'on_demand') as gmv_net_of_promo_express,
        sumIf(gmv_service_fee_net_promo, type_store_delivery == 'planned') +
            sumIf(gmv_goods_net_promo, type_store_delivery == 'planned') as gmv_net_of_promo_planned,
        sumIf(gmv_service_fee_net_promo, type_delivery = 'pickup') +
            sumIf(gmv_goods_net_promo, type_delivery = 'pickup') as gmv_net_of_promo_pickup,
        sum(gmv_service_fee_net_promo) + sum(gmv_goods_net_promo) as gmv_net_of_promo,
        sumIf(gross_profit, type_delivery = 'asap') as gross_profit_express,
        sumIf(gross_profit, type_delivery = 'planned') as gross_profit_planned,
        sumIf(gross_profit, type_delivery = 'pickup') as gross_profit_pickup,
        sumIf(gross_profit, type_store_delivery = 'on_demand') as gross_profit_express_store,
        sumIf(gross_profit, type_store_delivery = 'planned') as gross_profit_planned_store,
        sum(gross_profit) as gross_profit_full

        from gp_rep.rep__bi_shipment
        where 1=1
            and toDate(completed_at) between start_date and end_date
            and user_id is not null
            and order_state in ('complete', 'canceled')
            and retailer_id GLOBAL in (select retailer_id from analytics.int_retailer_verticals_dict where vertical = 0)
            and b2b_order_company_flg != 1
            and b2b_measure = 0
            and order_id not in (select order_id from late_cancelled_orders)
            and order_number not in 
            (
                select order_number
                from gp_rep.rep__bi_shipment
                where 1=1
                    and toDate(completed_at) between start_date and end_date + interval 14 day
                    and owner_type = 'ServiceAccount'
            )
        group by dictGet('analytics.spree_users_dict', 'uuid', toUInt64(user_id)) as uuid, order_number, completed_at
        )

    select 
        anonymous_id,
        platform,
        tenant,
    uniqExactIf(order_id, uuid != '' and completed_dt = events_dt) as orders, -- это значение может быть использовано как знаменатель для ratio метрики среднего чека
    max(if(uuid != '' and completed_dt = events_dt, 1, 0)) as conversion,
    sumIf(gmv_net_of_promo, completed_dt = events_dt) as gmv_per_user, -- это значение может быть использовано как числитель для ratio метрики среднего чека   

    minIf(completed_dt, completed_dt = events_dt) as first_order_date,
    if(
        first_order_date is null, 
        null, 
        arrayExists(
            elem -> assumeNotNull(elem <= toDate(end_date) and toDate(elem) > first_order_date), 
            groupArray(toDate(completed_dt))
        )
    ) as conv_to_second_order,   
    arrayExists(elem -> assumeNotNull(elem <= toDate(first_order_date + INTERVAL 14 DAY) and 
        elem > first_order_date), groupArray(toDate(shipped_at))) as ret_14d
    from ab_groups_x_events
    left join (select * from financial_measures where completed_dt >= start_date and completed_dt <= end_date + interval 14 day) financial_measures
        on order_id = order_number
    where tenant = 'sbermarket'
    group by anonymous_id, platform, tenant
   HAVING platform IN {tuple(platforms)}
    """
    return read_sql_query(q)

In [39]:
general_metrics_within_retlist_test_new = get_general_metrics_within_retlist_new(['android'], end_date, end_date, sources_web, sources_new_app)

## Кастомные

In [44]:
#из событийки - product_rendered
def get_custom_metrics_product_rendered(platforms, start_date, end_date, sources_web, sources_new_app):
    q = f"""
    with 
        toDate('{start_date}') as start_date,
        toDate('{end_date}') as end_date,

    filter_events as (
        select
            anonymous_id,
            toDate(min(ts)) as event_dt
        from event.new_app
        where 1=1
            and event = {event_filter}
            and dwh_dt between start_date and end_date
            and toDate(ts) between start_date and end_date
            and {ios_version_filter}
            and retailer_id GLOBAL in (select retailer_id from analytics.int_retailer_verticals_dict where vertical = 0)
        group by anonymous_id

        UNION ALL 

        select
            toString(anonymous_id) as anonymous_id,
            toDate(min(ts)) as event_dt
        from event.web
        where 1=1
            and event = {event_filter}
            and dwh_dt between start_date and end_date
            and toDate(ts) between start_date and end_date
            and retailer_id GLOBAL in (select retailer_id from analytics.int_retailer_verticals_dict where vertical = 0)
        group by anonymous_id
    ),

    events as (
        select 
            toString(anonymous_id) as anonymous_id,
            toDate(ts) as dt,
            'web' as platform,
            ifNull(NullIf(tenant, ''), 'sbermarket') as tenant
        from event.web
        where 1=1
            and toDate(ts) between start_date and end_date
            and dwh_dt between start_date and end_date
            and not_bot
            and event in {sources_web}
            and anonymous_id GLOBAL IN (SELECT anonymous_id FROM filter_events)
            and retailer_id GLOBAL in (select retailer_id from analytics.int_retailer_verticals_dict where vertical = 0)
        group by anonymous_id, dt, platform, tenant

        union all

        select 
            anonymous_id,
            toDate(ts) as dt,
            toString(platform) as platform,
            'sbermarket' as tenant
        from event.new_app
        where 1=1
            and toDate(ts) between start_date and end_date
            and dwh_dt between start_date and end_date
            and event in {sources_new_app}
            and {ios_version_filter}
            and anonymous_id GLOBAL IN (SELECT anonymous_id FROM filter_events)
            and retailer_id GLOBAL in (select retailer_id from analytics.int_retailer_verticals_dict where vertical = 0)
        group by anonymous_id, dt, platform, tenant
    ),

    

    client_id_x_order_id as (
        select distinct
            date_msk as date,
            uuid,
            anonymous_id,
            if(platform in ('desktop', 'mobile'), 'web', platform) as platform,
            tenant,
            order_id
        from 
            cdm.ab__client_id_x_order_id
        where true 
            and date_msk between start_date and end_date
            and anonymous_id IN (SELECT anonymous_id FROM filter_events)
    ),

    ab_groups_x_events as (
        select distinct
            events.anonymous_id as anonymous_id,
            events.platform as platform,
            events.tenant as tenant,
            client_id_x_order_id.order_id as order_id,
            events.dt as events_dt
        from 
            events

            inner join filter_events 
                on events.anonymous_id = filter_events.anonymous_id

            left join client_id_x_order_id
                on events.anonymous_id = client_id_x_order_id.anonymous_id
                and events.dt = client_id_x_order_id.date
                and events.platform = client_id_x_order_id.platform
                and events.tenant = client_id_x_order_id.tenant

        where true
          and filter_events.event_dt <= events_dt
    ),
    
    t1 as (
    select 
           anonymous_id,
           toDate(ts) as dt,
           toString(platform) as platform,
           order_id,
           event
      from event.new_app
     where 1 = 1
       and toDate(ts) between start_date and end_date
       and dwh_dt between start_date and end_date + {received_at_window}
       and event in ('Product Rendered')
       and retailer_id GLOBAL in (select retailer_id from analytics.int_retailer_verticals_dict where vertical = 0)
       and {ios_version_filter} 
       and params['product_sku'] global in (select product_sku from ods.product_hub__product where pack_type = 'Весовой')

    UNION ALL 

    select 
           toString(anonymous_id) AS anonymous_id,
           toDate(ts) as dt,
           'web' as platform,
           order_id,
           event
      from event.web
     where 1 = 1
       and toDate(ts) between start_date and end_date
       and dwh_dt between start_date and end_date + {received_at_window}
       and event in ('Product Rendered')
       and retailer_id GLOBAL in (select retailer_id from analytics.int_retailer_verticals_dict where vertical = 0)
       and params['product_sku'] global in (select product_sku from ods.product_hub__product where pack_type = 'Весовой')
    ),
    
    
    t2 as (
    select
        anonymous_id,
        dt,
        platform,
        order_id,
        countIf(event = 'Product Rendered') as product_rendered_count
    from t1
    group by 1,2,3,4
    )

    select
         ab_groups_x_events.platform AS platform,
         ab_groups_x_events.anonymous_id AS anonymous_id,
         case when sum(product_rendered_count) > 0 then 1 else 0 end as product_rendered_user
      from ab_groups_x_events 
            join t2 ON t2.order_id = ab_groups_x_events.order_id
                    and t2.dt = ab_groups_x_events.events_dt
                    and t2.anonymous_id = ab_groups_x_events.anonymous_id
                    and toString(t2.platform) = toString(ab_groups_x_events.platform)
     where platform IN {tuple(platforms)}
     GROUP BY 1,2
    SETTINGS enable_optimize_predicate_expression=0
    """
    return read_sql_query(q)

In [45]:
custom_metrics_product_rendered = get_custom_metrics_product_rendered(['android'], end_date, end_date, sources_web, sources_new_app)

In [46]:
custom_metrics_product_rendered

Unnamed: 0,platform,anonymous_id,product_rendered_user
0,android,938d1e1f7a1cbba2,1
1,android,ddd0073fa56f0e3f,1
2,android,3845f02dbe549dd9,1
3,android,66ada4772c83dbc7,1
4,android,bad0de215c6b284c,1
...,...,...,...
156473,android,56c955e563c281e6,1
156474,android,4627c08f0d6398d8,1
156475,android,96eb91d4235f52a0,1
156476,android,71370a34f96624c9,1


In [23]:
#из событийки - добавления в корзину
def get_custom_metrics_add2c(platforms, start_date, end_date, sources_web, sources_new_app):
    q = f"""
    with 
        toDate('{start_date}') as start_date,
        toDate('{end_date}') as end_date,

   filter_events as (
        select
            anonymous_id,
            toDate(min(ts)) as event_dt
        from event.new_app
        where 1=1
            and event = {event_filter}
            and dwh_dt between start_date and end_date
            and toDate(ts) between start_date and end_date
            and {ios_version_filter}
            and retailer_id GLOBAL in (select retailer_id from analytics.int_retailer_verticals_dict where vertical = 0)
        group by anonymous_id

        UNION ALL 

        select
            toString(anonymous_id) as anonymous_id,
            toDate(min(ts)) as event_dt
        from event.web
        where 1=1
            and event = {event_filter}
            and dwh_dt between start_date and end_date
            and toDate(ts) between start_date and end_date
            and retailer_id GLOBAL in (select retailer_id from analytics.int_retailer_verticals_dict where vertical = 0)
        group by anonymous_id
    ),

    events as (
        select 
            toString(anonymous_id) as anonymous_id,
            toDate(ts) as dt,
            'web' as platform,
            ifNull(NullIf(tenant, ''), 'sbermarket') as tenant
        from event.web
        where 1=1
            and toDate(ts) between start_date and end_date
            and dwh_dt between start_date and end_date
            and not_bot
            and event in {sources_web}
            and anonymous_id GLOBAL IN (SELECT anonymous_id FROM filter_events)
            and retailer_id GLOBAL in (select retailer_id from analytics.int_retailer_verticals_dict where vertical = 0)
        group by anonymous_id, dt, platform, tenant

        union all

        select 
            anonymous_id,
            toDate(ts) as dt,
            toString(platform) as platform,
            'sbermarket' as tenant
        from event.new_app
        where 1=1
            and toDate(ts) between start_date and end_date
            and dwh_dt between start_date and end_date
            and event in {sources_new_app}
            and {ios_version_filter}
            and anonymous_id GLOBAL IN (SELECT anonymous_id FROM filter_events)
            and retailer_id GLOBAL in (select retailer_id from analytics.int_retailer_verticals_dict where vertical = 0)
        group by anonymous_id, dt, platform, tenant
    ),

    

    client_id_x_order_id as (
        select distinct
            date_msk as date,
            uuid,
            anonymous_id,
            if(platform in ('desktop', 'mobile'), 'web', platform) as platform,
            tenant,
            order_id
        from 
            cdm.ab__client_id_x_order_id
        where true 
            and date_msk between start_date and end_date
            and anonymous_id IN (SELECT anonymous_id FROM filter_events)
    ),

    ab_groups_x_events as (
        select distinct
            events.anonymous_id as anonymous_id,
            events.platform as platform,
            events.tenant as tenant,
            client_id_x_order_id.order_id as order_id,
            events.dt as events_dt
        from 
            events

            inner join filter_events 
                on events.anonymous_id = filter_events.anonymous_id

            left join client_id_x_order_id
                on events.anonymous_id = client_id_x_order_id.anonymous_id
                and events.dt = client_id_x_order_id.date
                and events.platform = client_id_x_order_id.platform
                and events.tenant = client_id_x_order_id.tenant

        where true
          and filter_events.event_dt <= events_dt
    ),

    t1 as (
    select 
           anonymous_id,
           toDate(ts) as dt,
           toString(platform) as platform,
           order_id,
           event
      from event.new_app
     where 1 = 1
       and toDate(ts) between start_date and end_date
       and dwh_dt between start_date and end_date + {received_at_window}
       and event in ('Product Added')
       and retailer_id GLOBAL in (select retailer_id from analytics.int_retailer_verticals_dict where vertical = 0)
       and {ios_version_filter} 
       and params['product_sku'] global in (select product_sku from ods.product_hub__product where pack_type = 'Весовой')

    UNION ALL 

    select 
           toString(anonymous_id) AS anonymous_id,
           toDate(ts) as dt,
           'web' as platform,
           order_id,
           event
      from event.web
     where 1 = 1
       and toDate(ts) between start_date and end_date
       and dwh_dt between start_date and end_date + {received_at_window}
       and event in ('Product Added')
       and retailer_id GLOBAL in (select retailer_id from analytics.int_retailer_verticals_dict where vertical = 0)
       and params['product_sku'] global in (select product_sku from ods.product_hub__product where pack_type = 'Весовой')
    ),

    t2 as (
    select
        anonymous_id,
        dt,
        platform,
        order_id,
        countIf(event = 'Product Added') as product_added_count
    from t1
    group by 1,2,3,4
    )

    select
         ab_groups_x_events.platform AS platform,
         ab_groups_x_events.anonymous_id AS anonymous_id,
         case when sum(product_added_count) > 0 then 1 else 0 end as product_added_user,
         sum(product_added_count) as product_added_ratio
      from ab_groups_x_events 
            join t2 ON t2.order_id = ab_groups_x_events.order_id
                    and t2.dt = ab_groups_x_events.events_dt
                    and t2.anonymous_id = ab_groups_x_events.anonymous_id
                    and toString(t2.platform) = toString(ab_groups_x_events.platform)
     where platform IN {tuple(platforms)}
     GROUP BY 1,2
    SETTINGS enable_optimize_predicate_expression=0
    """
    return read_sql_query(q)

In [32]:
custom_metrics_add2c = get_custom_metrics_add2c(['android'], end_date, end_date, sources_web, sources_new_app)

In [33]:
custom_metrics_add2c

Unnamed: 0,platform,anonymous_id,product_added_user,product_added_ratio
0,android,cce0f3e69f31170e,1,5
1,android,5ed86a056fd13a3f,1,6
2,android,4d2978b62fd1c8a8,1,10
3,android,a34beca082964ef7,1,1
4,android,c00e3d0f145a1a45,1,2
...,...,...,...,...
58602,android,674979e5e2cdecdf,1,1
58603,android,f3dea7d4817d8bc0,1,1
58604,android,85fd018eaefb72ee,1,1
58605,android,ca5c42fe01afff40,1,3


In [24]:
#средний чек и среднее количество товаров - для весовых
def get_aov_and_avg_items(platforms, start_date, end_date, sources_web, sources_new_app):
    q = f"""
    with 
        toDate('{start_date}') as start_date,
        toDate('{end_date}') as end_date,

    filter_events as (
        select
            anonymous_id,
            toDate(min(ts)) as event_dt
        from event.new_app
        where 1=1
            and event = {event_filter}
            and dwh_dt between start_date and end_date
            and toDate(ts) between start_date and end_date
            and {ios_version_filter}
            and retailer_id GLOBAL in (select retailer_id from analytics.int_retailer_verticals_dict where vertical = 0)
        group by anonymous_id

        UNION ALL 

        select
            toString(anonymous_id) as anonymous_id,
            toDate(min(ts)) as event_dt
        from event.web
        where 1=1
            and event = {event_filter}
            and dwh_dt between start_date and end_date
            and toDate(ts) between start_date and end_date
            and retailer_id GLOBAL in (select retailer_id from analytics.int_retailer_verticals_dict where vertical = 0)
        group by anonymous_id
    ),

    events as (
        select 
            toString(anonymous_id) as anonymous_id,
            toDate(ts) as dt,
            'web' as platform,
            ifNull(NullIf(tenant, ''), 'sbermarket') as tenant
        from event.web
        where 1=1
            and toDate(ts) between start_date and end_date
            and dwh_dt between start_date and end_date
            and not_bot
            and event in {sources_web}
            and anonymous_id GLOBAL IN (SELECT anonymous_id FROM filter_events)
            and retailer_id GLOBAL in (select retailer_id from analytics.int_retailer_verticals_dict where vertical = 0)
        group by anonymous_id, dt, platform, tenant

        union all

        select 
            anonymous_id,
            toDate(ts) as dt,
            toString(platform) as platform,
            'sbermarket' as tenant
        from event.new_app
        where 1=1
            and toDate(ts) between start_date and end_date
            and dwh_dt between start_date and end_date
            and event in {sources_new_app}
            and {ios_version_filter}
            and anonymous_id GLOBAL IN (SELECT anonymous_id FROM filter_events)
            and retailer_id GLOBAL in (select retailer_id from analytics.int_retailer_verticals_dict where vertical = 0)
        group by anonymous_id, dt, platform, tenant
    ),

    

    client_id_x_order_id as (
        select distinct
            date_msk as date,
            uuid,
            anonymous_id,
            if(platform in ('desktop', 'mobile'), 'web', platform) as platform,
            tenant,
            order_id
        from 
            cdm.ab__client_id_x_order_id
        where true 
            and date_msk between start_date and end_date
            and anonymous_id IN (SELECT anonymous_id FROM filter_events)
    ),

    ab_groups_x_events as (
        select distinct
            events.anonymous_id as anonymous_id,
            events.platform as platform,
            events.tenant as tenant,
            client_id_x_order_id.order_id as order_id,
            events.dt as events_dt
        from 
            events

            inner join filter_events 
                on events.anonymous_id = filter_events.anonymous_id

            left join client_id_x_order_id
                on events.anonymous_id = client_id_x_order_id.anonymous_id
                and events.dt = client_id_x_order_id.date
                and events.platform = client_id_x_order_id.platform
                and events.tenant = client_id_x_order_id.tenant

        where true
          and filter_events.event_dt <= events_dt
    ),

    t1 as (
    select 
           anonymous_id,
           toDate(ts) as dt,
           toString(platform) as platform,
           order_id,
           event
      from event.new_app
     where 1 = 1
       and toDate(ts) between start_date and end_date
       and dwh_dt between start_date and end_date + {received_at_window}
       and event in ('Order Completed')
       and retailer_id GLOBAL in (select retailer_id from analytics.int_retailer_verticals_dict where vertical = 0)
       and {ios_version_filter} 

    UNION ALL 

    select 
           toString(anonymous_id) AS anonymous_id,
           toDate(ts) as dt,
           'web' as platform,
           order_id,
           event
      from event.web
     where 1 = 1
       and toDate(ts) between start_date and end_date
       and dwh_dt between start_date and end_date + {received_at_window}
       and event in ('Order Completed')
       and retailer_id GLOBAL in (select retailer_id from analytics.int_retailer_verticals_dict where vertical = 0)
    ),

    t2 as (
    SELECT 
        order_number,
        dt as created_at,
        sum(gmv_final) as gmv_final,
        sum(found_quantity) as found_quantity
        FROM sandbox.damage_orders_items
    where 1 = 1
        and dt between start_date and end_date
        and pricer_definition = 'Весовой'
        and toString(order_number) in (select order_id from t1)
    group by 1,2
    )

    select
         ab_groups_x_events.platform AS platform,
         ab_groups_x_events.anonymous_id AS anonymous_id,
         count(distinct order_number) as orders_for_aov,
         sum(gmv_final) as gmv_final,
         sum(found_quantity) as found_quantity
      from ab_groups_x_events a 
            join t2 b ON a.order_id = toString(b.order_number) and a.events_dt = b.created_at
     where platform IN {tuple(platforms)}
     GROUP BY 1,2
    SETTINGS enable_optimize_predicate_expression=0
    """
    return read_sql_query(q)

In [35]:
aov_and_avg_items = get_aov_and_avg_items(['android'], end_date, end_date, sources_web, sources_new_app)

In [36]:
aov_and_avg_items

Unnamed: 0,platform,anonymous_id,orders_for_aov,gmv_final,found_quantity
0,android,48abe913fe4f591a,1,116.750000,0.940000
1,android,5c86160c9db42e32,1,65.989998,1.100000
2,android,39e2a438f7ad0bf5,1,223.669998,3.400000
3,android,ef369210c1fc5cfa,1,1138.960003,11.882116
4,android,685eefc4b62277a1,1,282.859989,4.300000
...,...,...,...,...,...
26804,android,4ebf3def140e7252,1,152.220001,1.178000
26805,android,f51eb4457d1bb9a3,1,740.810013,6.864000
26806,android,303c2faa51fd343c,1,633.849983,6.010105
26807,android,253a67fd6d8a8d9f,1,806.189987,8.573333


# MDE

## Data Collection

### Базовые с фильтрацией

In [54]:
general_metrics_data_within_retlist = {}
#lengths = [2,3,4]
lengths = [4] #ПОТОМ ПОМЕНЯТЬ!
for l in tqdm(lengths):
    name = f"{l}w"
    tmp = get_general_metrics_within_retlist_new(platforms, end_date - dt.timedelta(weeks = l), end_date, sources_web, sources_new_app)
    general_metrics_data_within_retlist[name] = tmp

  0%|          | 0/1 [11:33<?, ?it/s]


KeyboardInterrupt: 

In [95]:
dump_data(general_metrics_data_within_retlist, dump_files_path+'general_metrics_data_within_retlist.pickle')

In [47]:
#Кастомные метрики - product_rendered
custom_metrics_product_rendered_data = {}
tmp1 = get_custom_metrics_product_rendered(platforms, end_date - dt.timedelta(weeks = 2), end_date, sources_web, sources_new_app)
tmp2 = get_custom_metrics_product_rendered(platforms, end_date - dt.timedelta(weeks = 3), end_date - dt.timedelta(weeks = 2), sources_web, sources_new_app)
tmp3 = get_custom_metrics_product_rendered(platforms, end_date - dt.timedelta(weeks = 4), end_date - dt.timedelta(weeks = 3), sources_web, sources_new_app)

In [48]:
pdlist = [tmp1, tmp2, tmp3]  # List of your dataframes
new_df = pd.concat(pdlist)
new_df

Unnamed: 0,platform,anonymous_id,product_rendered_user
0,android,5ec861ab7df8bee6,1
1,android,8fc7e4db068e7901,1
2,android,ed11c7291d14865c,1
3,android,bf5f9a261d735ae7,1
4,android,2a20a4820d4ac68b,1
...,...,...,...
771170,android,c9ff61f6c9160d28,1
771171,android,c075884033a8436b,1
771172,android,7b4ec802310724d5,1
771173,android,42de462610fd9f95,1


In [50]:
tmp = new_df[new_df.product_rendered_user > 0]

In [52]:
name = '4w'
custom_metrics_product_rendered_data[name] = tmp

In [25]:
#Кастомные метрики - product_rendered
custom_metrics_product_rendered_data = {}
#lengths = [2,3,4]
lengths = [4]
for l in tqdm(lengths):
    name = f"{l}w"
    tmp = get_custom_metrics_product_rendered(platforms, end_date - dt.timedelta(weeks = l), end_date, sources_web, sources_new_app)
    custom_metrics_product_rendered_data[name] = tmp

  0%|          | 0/1 [07:43<?, ?it/s]


DatabaseException: Orig exception: Code: 394.
DB::Exception: Query was cancelled. Stack trace:

0. DB::Exception::Exception(DB::Exception::MessageMasked&&, int, bool) @ 0x000000000c623c97 in /usr/bin/clickhouse
1. DB::Exception::Exception<char const (&) [20]>(int, char const (&) [20]) @ 0x000000000749e4a0 in /usr/bin/clickhouse
2. DB::PipelineExecutor::finalizeExecution() @ 0x000000001319c0c7 in /usr/bin/clickhouse
3. DB::PipelineExecutor::execute(unsigned long, bool) @ 0x000000001319bd90 in /usr/bin/clickhouse
4. void std::__function::__policy_invoker<void ()>::__call_impl<std::__function::__default_alloc_func<ThreadFromGlobalPoolImpl<true>::ThreadFromGlobalPoolImpl<DB::PullingAsyncPipelineExecutor::pull(DB::Chunk&, unsigned long)::$_0>(DB::PullingAsyncPipelineExecutor::pull(DB::Chunk&, unsigned long)::$_0&&)::'lambda'(), void ()>>(std::__function::__policy_storage const*) @ 0x00000000131a954f in /usr/bin/clickhouse
5. void* std::__thread_proxy[abi:v15000]<std::tuple<std::unique_ptr<std::__thread_struct, std::default_delete<std::__thread_struct>>, void ThreadPoolImpl<std::thread>::scheduleImpl<void>(std::function<void ()>, Priority, std::optional<unsigned long>, bool)::'lambda0'()>>(void*) @ 0x000000000c704bc4 in /usr/bin/clickhouse
6. start_thread @ 0x00000000000076db in /lib/x86_64-linux-gnu/libpthread-2.27.so
7. ? @ 0x000000000012161f in /lib/x86_64-linux-gnu/libc-2.27.so


In [69]:
dump_data(custom_metrics_product_rendered_data, dump_files_path+'custom_metrics_product_rendered_data.pickle')

In [None]:
#Кастомные метрики - добавления в корзину
custom_metrics_add2c_data = {}
#lengths = [2,3,4]
lengths = [4]
for l in tqdm(lengths):
    name = f"{l}w"
    tmp = get_custom_metrics_add2c(platforms, end_date - dt.timedelta(weeks = l), end_date, sources_web, sources_new_app)
    custom_metrics_add2c_data[name] = tmp

In [71]:
dump_data(custom_metrics_add2c_data, dump_files_path+'custom_metrics_add2c_data.pickle')

In [None]:
#Кастомные метрики - чек до сборки и после сборки
aov_and_avg_items_data = {}
#lengths = [2,3,4]
lengths = [4]
for l in tqdm(lengths):
    name = f"{l}w"
    tmp = get_aov_and_avg_items(platforms, end_date - dt.timedelta(weeks = l), end_date, sources_web, sources_new_app)
    aov_and_avg_items_data[name] = tmp

In [74]:
dump_data(aov_and_avg_items_data, dump_files_path+'aov_and_avg_items_data.pickle')

## Data pre-processing

In [55]:
general_metrics_data_within_retlist = load_data(dump_files_path+'general_metrics_data_within_retlist.pickle')

In [None]:
custom_metrics_product_rendered_data = load_data(dump_files_path+'custom_metrics_product_rendered_data.pickle')

In [56]:
custom_metrics_add2c_data = load_data(dump_files_path+'custom_metrics_add2c_data.pickle')

In [57]:
aov_and_avg_items_data = load_data(dump_files_path+'aov_and_avg_items_data.pickle')

## MDE Estimation

In [58]:
lengths = [4] #weeks 
final_data = {}
for l in tqdm(lengths):
    name = f"{l}w"
    general_metrics_data_within_retlist_tmp = general_metrics_data_within_retlist[name].copy()
    custom_metrics_product_rendered_data_tmp = custom_metrics_product_rendered_data[name].set_index(['anonymous_id']).reset_index().copy()
    custom_metrics_add2c_data_tmp = custom_metrics_add2c_data[name].set_index(['anonymous_id']).reset_index().copy()
    aov_and_avg_items_data_tmp = aov_and_avg_items_data[name].set_index(['anonymous_id']).reset_index().copy()
    out = (
        general_metrics_data_within_retlist_tmp
        .merge(custom_metrics_add2c_data_tmp, on = ['anonymous_id', 'platform'],how = 'outer')
        .merge(custom_metrics_product_rendered_data_tmp, on = ['anonymous_id', 'platform'],how = 'outer')
        .merge(aov_and_avg_items_data_tmp, on = ['anonymous_id', 'platform'],how = 'outer')
    )
    final_data[name] = out

100%|██████████| 1/1 [00:08<00:00,  8.96s/it]


In [97]:
final_data['2w'] = final_data['2w'].query('product_rendered_user == 1')
final_data['3w'] = final_data['3w'].query('product_rendered_user == 1')

final_data['2w'] = final_data['2w'].fillna(0)
final_data['3w'] = final_data['3w'].fillna(0)

In [60]:
final_data['4w'] = final_data['4w'].query('product_rendered_user == 1')

final_data['4w'] = final_data['4w'].fillna(0)

In [61]:
res = {
       "length": [], 
       "traffic_proportion": [], 
       "alpha":[], 
       "nobs":[],
       "est":[],
       "mde_abs": [], 
       "mde_percent": [],
       "metric": []
      }

In [62]:
data = final_data.copy()

In [63]:
# format: (num, denum), if denum is None => average metric
metrics = [
    ('conversion',None), # Конверсия в заказ 
    ('gmv_per_user','orders'), # Средний чек по платящим пользователям
    ('ret_14d',None), # Retention
    ("gmv_per_user", None), # GMV per user
    ("product_added_user", "product_rendered_user"), # Конверсия в корзину весового товара из сниппета весового товара
    ("gmv_final", "orders_for_aov"), # AOV для весовых товаров
    ("product_added_ratio", None), # Среднее кол-во весовых товаров, добавленных в корзину
    ("found_quantity", None) # Среднее кол-во весовых товаров в заказе
]

In [64]:
from statsmodels.stats.power import tt_ind_solve_power 

In [65]:
ratio = 1
power = 0.8
for length in data.keys():
    for traffic_proportion in traffic_proportions:
        for metric in tqdm(metrics):
            df = data[length].sample(frac=traffic_proportion)
            num_col = metric[0]
            denum_col = metric[1]
            
            # preprocessing
            if denum_col is not None: 
                df = df[df[denum_col]>0]
            else:
                df = df[df[num_col].notna()]

            df = df.sample(frac=0.5) # 50 на 50

            df = drop_outliers(df)
            
            nobs_test = df[num_col].shape[0]

            if df.shape[0]>1:
                if denum_col:
                    num = df[num_col]
                    denum = df[denum_col]
                    est, sd = deltamethod(denum, num, independent=False, bc=True)
                else:
                    est = df[num_col].mean()
                    sd = df[num_col].std()
                                    
                for threshold in thresholds:
                    alpha = threshold
                    effect_size =  tt_ind_solve_power(power=power, nobs1=nobs_test, alpha=alpha, ratio=ratio)
                    mde_percent = effect_size*sd/est
                    mde_abs = mde_percent*est
                    res["length"].append(length)
                    res["traffic_proportion"].append(traffic_proportion)
                    res["alpha"].append(alpha)
                    res["nobs"].append(nobs_test)
                    res["est"].append(est)
                    res["mde_abs"].append(mde_abs)
                    res["mde_percent"].append(mde_percent)
                    res["metric"].append(metric)
            else:
                    res["length"].append(length)
                    res["traffic_proportion"].append(traffic_proportion)
                    res["alpha"].append(alpha)
                    res["nobs"].append(nobs_test)
                    res["est"].append(-1)
                    res["mde_abs"].append(-1)
                    res["mde_percent"].append(-1)
                    res["metric"].append(metric)
                

100%|██████████| 8/8 [00:05<00:00,  1.48it/s]
100%|██████████| 8/8 [00:10<00:00,  1.28s/it]
100%|██████████| 8/8 [00:20<00:00,  2.58s/it]
100%|██████████| 8/8 [00:32<00:00,  4.02s/it]
100%|██████████| 8/8 [00:41<00:00,  5.15s/it]


In [66]:
mde_df = pd.DataFrame(res)

In [67]:
mde_df = pd.DataFrame(res)
mde_df["mde_abs"] = mde_df["mde_abs"].round(4)
mde_df["mde_percent"] = (mde_df["mde_percent"]*100).round(2).astype(str) + "%"

In [68]:
mde_df

Unnamed: 0,length,traffic_proportion,alpha,nobs,est,mde_abs,mde_percent,metric
0,4w,0.125,0.05,168071,0.661595,0.0046,0.69%,"(conversion, None)"
1,4w,0.125,0.01,168071,0.661595,0.0056,0.84%,"(conversion, None)"
2,4w,0.125,0.05,111448,2007.307483,19.3244,0.96%,"(gmv_per_user, orders)"
3,4w,0.125,0.01,111448,2007.307483,23.5742,1.17%,"(gmv_per_user, orders)"
4,4w,0.125,0.05,168069,0.419346,0.0048,1.14%,"(ret_14d, None)"
...,...,...,...,...,...,...,...,...
75,4w,1.000,0.01,498625,371.698582,3.3174,0.89%,"(gmv_final, orders_for_aov)"
76,4w,1.000,0.05,1344230,8.445352,0.0594,0.7%,"(product_added_ratio, None)"
77,4w,1.000,0.01,1344230,8.445352,0.0725,0.86%,"(product_added_ratio, None)"
78,4w,1.000,0.05,1344198,3.880515,0.0399,1.03%,"(found_quantity, None)"


In [69]:
mde_df[mde_df.mde_abs == 0]

Unnamed: 0,length,traffic_proportion,alpha,nobs,est,mde_abs,mde_percent,metric


In [70]:
metric_names = {
    "('conversion', None)":'Конверсия в заказ',
    "('gmv_per_user', 'orders')":'Средний чек',
    "('ret_14d', None)":'Ретеншн 14d',
    "('gmv_per_user', None)" :'GMV per user',
    "('conv_to_second_order', None)" :'Конверсия в повторный заказ',
    "('product_added_user', 'product_rendered_user')" :'Конверсия в корзину из просмотра сниппета для весовых товаров',
    "('gmv_initial', 'orders_for_aov')" :'Средний чек до сборки',
    "('gmv_final', 'orders_for_aov')" :'AOV для весовых товаров',
    "('product_added_ratio', None)" :'Среднее количество весовых товаров, добавленных в корзину',
    "('found_quantity', None)" :'Среднее количество весовых товаров в заказе',
}

In [71]:
mde_df['metric_name'] = mde_df.metric.astype(str).apply(lambda x: metric_names[x])

In [72]:
mde_df

Unnamed: 0,length,traffic_proportion,alpha,nobs,est,mde_abs,mde_percent,metric,metric_name
0,4w,0.125,0.05,168071,0.661595,0.0046,0.69%,"(conversion, None)",Конверсия в заказ
1,4w,0.125,0.01,168071,0.661595,0.0056,0.84%,"(conversion, None)",Конверсия в заказ
2,4w,0.125,0.05,111448,2007.307483,19.3244,0.96%,"(gmv_per_user, orders)",Средний чек
3,4w,0.125,0.01,111448,2007.307483,23.5742,1.17%,"(gmv_per_user, orders)",Средний чек
4,4w,0.125,0.05,168069,0.419346,0.0048,1.14%,"(ret_14d, None)",Ретеншн 14d
...,...,...,...,...,...,...,...,...,...
75,4w,1.000,0.01,498625,371.698582,3.3174,0.89%,"(gmv_final, orders_for_aov)",AOV для весовых товаров
76,4w,1.000,0.05,1344230,8.445352,0.0594,0.7%,"(product_added_ratio, None)","Среднее количество весовых товаров, добавленны..."
77,4w,1.000,0.01,1344230,8.445352,0.0725,0.86%,"(product_added_ratio, None)","Среднее количество весовых товаров, добавленны..."
78,4w,1.000,0.05,1344198,3.880515,0.0399,1.03%,"(found_quantity, None)",Среднее количество весовых товаров в заказе


In [73]:
dump_files_path

'/home/jovyan/AB-tests/Изменение квантов весовых товаров/'

In [74]:
mde_df.drop(columns = ['metric']).to_feather(dump_files_path+'mde_df_4w.feather')

In [75]:
mde_df.drop(columns = ['metric']).to_excel(dump_files_path+'mde_df_4w.xlsx', index=False)