In [1]:
from pathlib import Path
import pandas as pd
import numpy as np

DATA_DIR = Path("../data/interim")


In [11]:
pd.read_parquet(DATA_DIR / "transition_1_to_2.parquet")
df_k = pd.read_parquet(DATA_DIR / "cust_day_group.parquet")



In [12]:
T12 = pd.read_parquet(DATA_DIR / "transition_1_to_2.parquet")
T23 = pd.read_parquet(DATA_DIR / "transition_2_to_3.parquet")
T34 = pd.read_parquet(DATA_DIR / "transition_3_to_4.parquet")


In [13]:
top_share_12 = T12.max(axis=1)
top_share_12.sort_values(ascending=False).head(10)


entry_group
24_Filtry przepływowe - wkłady      0.769780
32_nabój CO2 - wymiana              0.639483
26_podgrzewacze przepływowe         0.613861
27_Proskin - urządzenia i filtry    0.597847
19_Flow Comfort D1 trójdrożna       0.592121
21_wkład D1 do Flow Comfort         0.561612
23_Filtry przepływowe - obudowy     0.560467
16_filtry do dzbanków AGD+          0.535449
22_wkład DC10 do Flow Comfort       0.518315
17_Flow Comfort D1 jednodrożna      0.464698
dtype: float64

In [19]:
# keep only purchase rows
df_k = df_k[df_k["is_purchase"] == True].copy()

df_k = df_k.dropna(subset=["date"])


# convert date column to datetime
df_k["date"] = pd.to_datetime(df_k["date"], dayfirst=True, errors="coerce")

# parse date properly
df_k["date"] = pd.to_datetime(df_k["date"], dayfirst=True)

# assign purchase number per customer
df_k["purchase_k"] = (
    df_k
    .sort_values(["anon", "date"])
    .groupby("anon")["date"]
    .rank(method="dense")
    .astype(int)
)


In [21]:
entry_support = (
    df_k[df_k["purchase_k"] == 1]
    [["anon", "MATRIX GRUPA PRODUKTOWA"]]
    .drop_duplicates()
    .groupby("MATRIX GRUPA PRODUKTOWA")["anon"]
    .nunique()
    .rename("n_customers")
    .sort_values(ascending=False)
)

entry_support.head(10)


MATRIX GRUPA PRODUKTOWA
06_filtry do butelek Soft i Solid    110942
03_butelki filtrujące SOLID           91701
13_filtry do dzbanków standard        68321
08_dzbanki filtrujące manualne        56071
10_dzbanki filtrujące Crystal         41610
02_butelki filtrujące SOFT            30626
07_akcesoria do Soft/Solid            28331
38_inne                               13482
16_filtry do dzbanków AGD+            12608
09_dzbanki filtrujące LED             11763
Name: n_customers, dtype: int64

In [22]:
top_share_12 = T12.max(axis=1).rename("top_share")

kpi_12 = (
    pd.concat([top_share_12, entry_support], axis=1)
    .dropna()
    .sort_values("n_customers", ascending=False)
)

kpi_12.head(10)


Unnamed: 0,top_share,n_customers
06_filtry do butelek Soft i Solid,0.41776,110942
03_butelki filtrujące SOLID,0.364796,91701
13_filtry do dzbanków standard,0.379161,68321
08_dzbanki filtrujące manualne,0.363807,56071
10_dzbanki filtrujące Crystal,0.317696,41610
02_butelki filtrujące SOFT,0.313916,30626
07_akcesoria do Soft/Solid,0.3121,28331
38_inne,0.245131,13482
16_filtry do dzbanków AGD+,0.535449,12608
09_dzbanki filtrujące LED,0.225808,11763


In [23]:
support_threshold = entry_support.median()
top_share_threshold = top_share_12.median()

support_threshold, top_share_threshold


(2319.0, 0.31769608634908975)

In [24]:
kpi_12["gateway_candidate"] = (
    (kpi_12["n_customers"] >= support_threshold) &
    (kpi_12["top_share"] >= top_share_threshold)
)


In [25]:
kpi_12.sort_values(
    ["gateway_candidate", "n_customers", "top_share"],
    ascending=[False, False, False]
).head(15)


Unnamed: 0,top_share,n_customers,gateway_candidate
06_filtry do butelek Soft i Solid,0.41776,110942,True
03_butelki filtrujące SOLID,0.364796,91701,True
13_filtry do dzbanków standard,0.379161,68321,True
08_dzbanki filtrujące manualne,0.363807,56071,True
10_dzbanki filtrujące Crystal,0.317696,41610,True
16_filtry do dzbanków AGD+,0.535449,12608,True
35_pojemniki SeeYou,0.435802,7163,True
24_Filtry przepływowe - wkłady,0.76978,7150,True
21_wkład D1 do Flow Comfort,0.561612,2319,True
02_butelki filtrujące SOFT,0.313916,30626,False


In [26]:
same_category_12 = pd.Series(
    np.diag(T12.loc[T12.index, T12.index].values),
    index=T12.index,
    name="same_category"
)


In [27]:
kpi_12 = kpi_12.join(same_category_12)
kpi_12.head(10)


Unnamed: 0,top_share,n_customers,gateway_candidate,same_category
06_filtry do butelek Soft i Solid,0.41776,110942,True,0.41776
03_butelki filtrujące SOLID,0.364796,91701,True,0.364796
13_filtry do dzbanków standard,0.379161,68321,True,0.379161
08_dzbanki filtrujące manualne,0.363807,56071,True,0.363807
10_dzbanki filtrujące Crystal,0.317696,41610,True,0.248636
02_butelki filtrujące SOFT,0.313916,30626,False,0.313916
07_akcesoria do Soft/Solid,0.3121,28331,False,0.278569
38_inne,0.245131,13482,False,0.032608
16_filtry do dzbanków AGD+,0.535449,12608,True,0.535449
09_dzbanki filtrujące LED,0.225808,11763,False,0.225808


In [35]:
same_threshold = kpi_12["same_category"].median()

kpi_12["gateway_type"] = np.where(
    kpi_12["gateway_candidate"] & (kpi_12["same_category"] < same_threshold),
    "gateway_push",
    np.where(
        kpi_12["gateway_candidate"],
        "gateway_repeat",
        "non_gateway"
    )
)

kpi_12.sort_values(
    ["gateway_type", "n_customers"],
    ascending=[True, False]
)



Unnamed: 0,top_share,n_customers,gateway_candidate,same_category,gateway_type
06_filtry do butelek Soft i Solid,0.41776,110942,True,0.41776,gateway_repeat
03_butelki filtrujące SOLID,0.364796,91701,True,0.364796,gateway_repeat
13_filtry do dzbanków standard,0.379161,68321,True,0.379161,gateway_repeat
08_dzbanki filtrujące manualne,0.363807,56071,True,0.363807,gateway_repeat
10_dzbanki filtrujące Crystal,0.317696,41610,True,0.248636,gateway_repeat
16_filtry do dzbanków AGD+,0.535449,12608,True,0.535449,gateway_repeat
35_pojemniki SeeYou,0.435802,7163,True,0.435802,gateway_repeat
24_Filtry przepływowe - wkłady,0.76978,7150,True,0.76978,gateway_repeat
21_wkład D1 do Flow Comfort,0.561612,2319,True,0.561612,gateway_repeat
02_butelki filtrujące SOFT,0.313916,30626,False,0.313916,non_gateway


In [38]:
kpi_12["cross_category"] = 1 - kpi_12["same_category"]

kpi_12.sort_values(
    ["cross_category", "n_customers"],
    ascending=[False, False]
).tail(15)



Unnamed: 0,top_share,n_customers,gateway_candidate,same_category,gateway_type,cross_category
15_filtry do dzbanków Ph+,0.298923,1250,False,0.298923,non_gateway,0.701077
14_filtry do dzbanków Mg+,0.304843,9189,False,0.304843,non_gateway,0.695157
02_butelki filtrujące SOFT,0.313916,30626,False,0.313916,non_gateway,0.686084
08_dzbanki filtrujące manualne,0.363807,56071,True,0.363807,gateway_repeat,0.636193
03_butelki filtrujące SOLID,0.364796,91701,True,0.364796,gateway_repeat,0.635204
13_filtry do dzbanków standard,0.379161,68321,True,0.379161,gateway_repeat,0.620839
06_filtry do butelek Soft i Solid,0.41776,110942,True,0.41776,gateway_repeat,0.58224
35_pojemniki SeeYou,0.435802,7163,True,0.435802,gateway_repeat,0.564198
22_wkład DC10 do Flow Comfort,0.518315,1376,False,0.518315,non_gateway,0.481685
16_filtry do dzbanków AGD+,0.535449,12608,True,0.535449,gateway_repeat,0.464551
