### У какого количества уникальных клиентов разница между текущей покупкой и предыдущей покупкой равна 20-ти дням?

In [85]:
import pandas as pd

#### Step 1. Loading data

In [86]:
try:
    df = pd.read_parquet("data/couriers_orders.parquet")
    print(df.head())
except FileNotFoundError as e:
    print(f"{e}")

        date  courier_id  order_id  distance  travel_time
0 2021-07-12          10         1      1.90        36.17
1 2021-07-02           3         2      3.98        21.34
2 2021-04-15           6         3      3.98        43.33
3 2021-07-16          10         4      2.85        14.01
4 2021-06-11          10         5      4.89        32.09


#### Step 2. Sorting data

In [87]:
print(df.head())
print("-" * 60)

df = df.sort_values(by=["courier_id", "date"])
print(df.head())

        date  courier_id  order_id  distance  travel_time
0 2021-07-12          10         1      1.90        36.17
1 2021-07-02           3         2      3.98        21.34
2 2021-04-15           6         3      3.98        43.33
3 2021-07-16          10         4      2.85        14.01
4 2021-06-11          10         5      4.89        32.09
------------------------------------------------------------
           date  courier_id  order_id  distance  travel_time
1330 2021-04-03           1      1331      1.20        39.68
1302 2021-04-04           1      1303      1.23        49.07
346  2021-04-05           1       347      2.32        42.44
277  2021-04-06           1       278      2.23        57.29
1637 2021-04-08           1      1638      2.21        42.41


#### Step 3. Searching prev order date

In [88]:
df["prev_order_date"] = df.groupby("courier_id")["date"].shift(1)
print(df.head())

           date  courier_id  order_id  distance  travel_time prev_order_date
1330 2021-04-03           1      1331      1.20        39.68             NaT
1302 2021-04-04           1      1303      1.23        49.07      2021-04-03
346  2021-04-05           1       347      2.32        42.44      2021-04-04
277  2021-04-06           1       278      2.23        57.29      2021-04-05
1637 2021-04-08           1      1638      2.21        42.41      2021-04-06


#### Step 4. Days diff calc

In [89]:
days_diff = df["date"] - df["prev_order_date"]
print(days_diff.head(7))

1330      NaT
1302   1 days
346    1 days
277    1 days
1637   2 days
1501   1 days
943    2 days
dtype: timedelta64[us]


In [90]:
pd.set_option("display.width", 500)

In [91]:
df["days_between_purchases"] = days_diff.dt.days
print(df)

           date  courier_id  order_id  distance  travel_time prev_order_date  days_between_purchases
1330 2021-04-03           1      1331      1.20        39.68             NaT                     NaN
1302 2021-04-04           1      1303      1.23        49.07      2021-04-03                     1.0
346  2021-04-05           1       347      2.32        42.44      2021-04-04                     1.0
277  2021-04-06           1       278      2.23        57.29      2021-04-05                     1.0
1637 2021-04-08           1      1638      2.21        42.41      2021-04-06                     2.0
...         ...         ...       ...       ...          ...             ...                     ...
459  2021-08-29          10       460      4.71        28.85      2021-08-29                     0.0
746  2021-08-29          10       747      4.98        58.31      2021-08-29                     0.0
1663 2021-08-29          10      1664      0.77        53.74      2021-08-29               

#### Step 5. 20-days filtration

In [92]:
twenty_days_gaps = df[df["days_between_purchases"] == 20]
print(twenty_days_gaps)

Empty DataFrame
Columns: [date, courier_id, order_id, distance, travel_time, prev_order_date, days_between_purchases]
Index: []


#### Step 6. Couriers count with 20-days

In [95]:
unique_couriers = twenty_days_gaps["courier_id"].nunique()

print("=" * 25)
print(f"    {unique_couriers} unique couriers")
print("=" * 25)

    0 unique couriers
