### Компания хочет понять, насколько равномерно курьеры работают в течение месяца. Для этого нужно найти ID курьера с наибольшей разницей между максимальной и минимальной средней дневной скоростью в июне 2021 года.

In [79]:
import pandas as pd

#### Step 1. Loading data and filtering data

In [80]:
try:
    df = pd.read_parquet("data/couriers_orders.parquet")
    print(df.head())
except FileNotFoundError as e:
    print(f"{e}")

        date  courier_id  order_id  distance  travel_time
0 2021-07-12          10         1      1.90        36.17
1 2021-07-02           3         2      3.98        21.34
2 2021-04-15           6         3      3.98        43.33
3 2021-07-16          10         4      2.85        14.01
4 2021-06-11          10         5      4.89        32.09


In [81]:
june_df = df[
    (df["date"].dt.month == 6) &
    (df["date"].dt.year == 2021)
    ].copy()

june_df = june_df[
    (june_df["distance"] > 0) &
    (june_df["travel_time"] > 0)
    ]

print(june_df.head())

         date  courier_id  order_id  distance  travel_time
4  2021-06-11          10         5      4.89        32.09
8  2021-06-14           4         9      4.13        29.34
9  2021-06-27           8        10      1.04        12.56
18 2021-06-27           1        19      1.85        13.56
24 2021-06-28           2        25      4.02        12.43


#### Step 2. New column "speed_km/h"

In [82]:
june_df["speed_km/h"] = june_df["distance"] / (june_df["travel_time"] / 60)
print(june_df.head())

         date  courier_id  order_id  distance  travel_time  speed_km/h
4  2021-06-11          10         5      4.89        32.09    9.143035
8  2021-06-14           4         9      4.13        29.34    8.445808
9  2021-06-27           8        10      1.04        12.56    4.968153
18 2021-06-27           1        19      1.85        13.56    8.185841
24 2021-06-28           2        25      4.02        12.43   19.404666


#### Step 3. Average daily speed for a courier

In [83]:
daily_avg_speed_df = june_df\
    .groupby(["courier_id", "date"])["speed_km/h"]\
    .mean()\
    .reset_index()

daily_avg_speed_df = daily_avg_speed_df.rename(columns={"speed_km/h": "daily_avg_speed_km/h"})

print(daily_avg_speed_df.head(10))

   courier_id       date  daily_avg_speed_km/h
0           1 2021-06-01             14.159193
1           1 2021-06-02              6.359468
2           1 2021-06-03              1.989529
3           1 2021-06-04              3.360499
4           1 2021-06-05              5.792179
5           1 2021-06-06              7.422680
6           1 2021-06-07             10.111683
7           1 2021-06-08              6.631481
8           1 2021-06-10              3.810409
9           1 2021-06-11             25.342334


#### Step 4. Min-Max daily speed for a courier

In [84]:
daily_min_max_speed_df = daily_avg_speed_df.groupby("courier_id")["daily_avg_speed_km/h"].agg(
    daily_min_speed="min",
    daily_max_speed="max"
).reset_index()

print(daily_min_max_speed_df)

   courier_id  daily_min_speed  daily_max_speed
0           1         1.665531        25.342334
1           2         1.516789        19.404666
2           3         1.253071        13.351279
3           4         0.973960        25.776567
4           5         1.104112        17.651376
5           6         1.403068        25.497186
6           7         1.053556        11.335089
7           8         0.792570        14.946159
8           9         0.754786        13.315508
9          10         1.751201        18.605004


#### Step 5. New column "speed_diff"

In [85]:
daily_min_max_speed_df["speed_diff"] = (
        daily_min_max_speed_df["daily_max_speed"] - daily_min_max_speed_df["daily_min_speed"]
)

print(daily_min_max_speed_df.head(10))

   courier_id  daily_min_speed  daily_max_speed  speed_diff
0           1         1.665531        25.342334   23.676802
1           2         1.516789        19.404666   17.887877
2           3         1.253071        13.351279   12.098207
3           4         0.973960        25.776567   24.802607
4           5         1.104112        17.651376   16.547264
5           6         1.403068        25.497186   24.094118
6           7         1.053556        11.335089   10.281534
7           8         0.792570        14.946159   14.153590
8           9         0.754786        13.315508   12.560722
9          10         1.751201        18.605004   16.853803


#### Step 6. Courier with max diff

In [86]:
courier_min_max_speed_sorted_df = daily_min_max_speed_df.sort_values(by="speed_diff", ascending=False)
print(courier_min_max_speed_sorted_df)

top_courier = courier_min_max_speed_sorted_df.iloc[0]
print(top_courier)

   courier_id  daily_min_speed  daily_max_speed  speed_diff
3           4         0.973960        25.776567   24.802607
5           6         1.403068        25.497186   24.094118
0           1         1.665531        25.342334   23.676802
1           2         1.516789        19.404666   17.887877
9          10         1.751201        18.605004   16.853803
4           5         1.104112        17.651376   16.547264
7           8         0.792570        14.946159   14.153590
8           9         0.754786        13.315508   12.560722
2           3         1.253071        13.351279   12.098207
6           7         1.053556        11.335089   10.281534
courier_id          4.000000
daily_min_speed     0.973960
daily_max_speed    25.776567
speed_diff         24.802607
Name: 3, dtype: float64


#### Step 7. Results

In [87]:
print("=" * 40)
print(f"    Courier with id {top_courier["courier_id"]:.0f}")
print(f"    Maximum difference: {top_courier["speed_diff"]:.2f} km/h")
print(f"    Min: {top_courier["daily_min_speed"]:.2f} km/h, Max: {top_courier['daily_max_speed']:.2f} km/h")
print("=" * 40)

    Courier with id 4
    Maximum difference: 24.80 km/h
    Min: 0.97 km/h, Max: 25.78 km/h
