### В конце каждого месяца компания выдает премию для своих курьеров, средняя скорость доставки за прошедший месяц которых больше средней скорости среди всех курьеров. Сколько курьеров получили премию за июнь 2021 года?

In [1]:
import pandas as pd

#### Step 1. Loading and filtering data

In [2]:
try:
    df = pd.read_parquet("data/couriers_orders.parquet")
    print(df)
except FileNotFoundError as e:
    print(f"Error. {e}")

           date  courier_id  order_id  distance  travel_time
0    2021-07-12          10         1      1.90        36.17
1    2021-07-02           3         2      3.98        21.34
2    2021-04-15           6         3      3.98        43.33
3    2021-07-16          10         4      2.85        14.01
4    2021-06-11          10         5      4.89        32.09
...         ...         ...       ...       ...          ...
1661 2021-04-05           4      1662      3.07        14.54
1662 2021-05-15          10      1663      1.33        23.13
1663 2021-08-29          10      1664      0.77        53.74
1664 2021-05-17           6      1665      3.22        16.45
1665 2021-05-01           1      1666      3.94        46.00

[1666 rows x 5 columns]


In [3]:
june_df = df[
    (df["date"].dt.year == 2021) &
    (df["date"].dt.month == 6)
].copy()

print(june_df)

           date  courier_id  order_id  distance  travel_time
4    2021-06-11          10         5      4.89        32.09
8    2021-06-14           4         9      4.13        29.34
9    2021-06-27           8        10      1.04        12.56
18   2021-06-27           1        19      1.85        13.56
24   2021-06-28           2        25      4.02        12.43
...         ...         ...       ...       ...          ...
1648 2021-06-01           6      1649      2.09        50.89
1653 2021-06-23           9      1654      4.03        16.62
1655 2021-06-09           6      1656      2.36        37.31
1658 2021-06-28           4      1659      3.22        52.66
1660 2021-06-05           9      1661      3.32        14.96

[290 rows x 5 columns]


#### Step 2. June records and new column "speed_kmh"

In [4]:
june_df = june_df[
    (june_df["travel_time"] > 0) &
    (june_df["distance"] > 0)
]

june_df["speed_kmh"] = june_df["distance"] / (june_df["travel_time"] / 60)
print(june_df)

           date  courier_id  order_id  distance  travel_time  speed_kmh
4    2021-06-11          10         5      4.89        32.09   9.143035
8    2021-06-14           4         9      4.13        29.34   8.445808
9    2021-06-27           8        10      1.04        12.56   4.968153
18   2021-06-27           1        19      1.85        13.56   8.185841
24   2021-06-28           2        25      4.02        12.43  19.404666
...         ...         ...       ...       ...          ...        ...
1648 2021-06-01           6      1649      2.09        50.89   2.464138
1653 2021-06-23           9      1654      4.03        16.62  14.548736
1655 2021-06-09           6      1656      2.36        37.31   3.795229
1658 2021-06-28           4      1659      3.22        52.66   3.668819
1660 2021-06-05           9      1661      3.32        14.96  13.315508

[290 rows x 6 columns]


#### Step 3. Whole average speed of all couriers

In [5]:
whole_avg_speed_df = june_df["speed_kmh"].mean()
print(f"Whole average speed: {whole_avg_speed_df:.2f} km/h")

Whole average speed: 6.28 km/h


#### Step 4. Average speed of each courier

In [6]:
courier_avg_speed_df = june_df\
    .groupby("courier_id")["speed_kmh"]\
    .mean()\
    .reset_index()

print(courier_avg_speed_df)

   courier_id  speed_kmh
0           1   6.749325
1           2   6.528423
2           3   6.223554
3           4   7.692341
4           5   5.325696
5           6   6.791953
6           7   5.801655
7           8   6.826372
8           9   4.972335
9          10   6.580480


#### Step 5. Results

In [7]:
courier_with_bonus = courier_avg_speed_df[
    courier_avg_speed_df["speed_kmh"] > whole_avg_speed_df
]
courier_with_bonus_cnt = len(courier_with_bonus)
courier_with_bonus_list = courier_avg_speed_df[
    courier_avg_speed_df["speed_kmh"] > whole_avg_speed_df
]

courier_with_bonus_list = round(courier_with_bonus_list, 2)

print("=" * 40)
print(f"    {courier_with_bonus_cnt} couriers with bonus")
print(f"    Their average speed is {whole_avg_speed_df:.2f} km/h")
print(f"    List of couriers with bonus:\n{courier_with_bonus_list}")
print("=" * 40)

    6 couriers with bonus
    Their average speed is 6.28 km/h
    List of couriers with bonus:
   courier_id  speed_kmh
0           1       6.75
1           2       6.53
3           4       7.69
5           6       6.79
7           8       6.83
9          10       6.58
